From 08003c24c8ffa7ac2d62b8c7bb301d57be74b36e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 24 Feb 2022 21:29:37 -0500 Subject: [PATCH] Moving device functions to cuh files and deprecating hpp (#524) For consistency, we had originally scraped through the primitive functions and used the `hpp` extension across the public API. However, it was brought to my attention more recently that this is confusing when considering the larger scope of the project- which also contains many host-only APIs that don't require a cuda-enabled compiler. However, as we're gaining more consumers, we need to start being more careful about making breaking changes to the public APIs and their header files. For this reason, I'm opting to copy the existing `hpp` files into `cuh` files, deprecating the hpp files, and using `#define` w/ conditionals to make sure the contents from only one file get defined even if both are included (for example, when a user includes `filea.hpp` but raft internally includes `filea.cuh`. This should allow us to set a version where we can make an announcement to remove the offending `hpp` files and give ample notice before the breaking change is made. Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Mark Sadang (https://github.com/msadang) - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/raft/pull/524 --- build.sh | 2 +- ci/release/update-version.sh | 2 +- cpp/cmake/thirdparty/get_faiss.cmake | 2 +- cpp/include/raft/cluster/detail/kmeans.cuh | 2 +- .../raft/cluster/{kmeans.hpp => kmeans.cuh} | 2 +- cpp/include/raft/comms/comms.hpp | 2 +- cpp/include/raft/comms/comms_test.hpp | 2 +- cpp/include/raft/comms/detail/ucp_helper.hpp | 2 +- cpp/include/raft/comms/helper.hpp | 2 +- cpp/include/raft/comms/mpi_comms.hpp | 2 +- cpp/include/raft/comms/std_comms.hpp | 2 +- .../raft/distance/detail/correlation.cuh | 4 +- cpp/include/raft/distance/detail/cosine.cuh | 4 +- cpp/include/raft/distance/detail/distance.cuh | 2 +- .../raft/distance/detail/euclidean.cuh | 4 +- .../raft/distance/detail/fused_l2_nn.cuh | 4 +- .../raft/distance/detail/hellinger.cuh | 4 +- .../detail/pairwise_distance_base.cuh | 4 +- cpp/include/raft/distance/distance.cuh | 325 ++++++++++++ cpp/include/raft/distance/distance.hpp | 11 +- cpp/include/raft/distance/fused_l2_nn.cuh | 118 +++++ cpp/include/raft/distance/fused_l2_nn.hpp | 11 +- cpp/include/raft/distance/specializations.cuh | 24 + cpp/include/raft/distance/specializations.hpp | 13 +- .../detail/{canberra.hpp => canberra.cuh} | 2 +- .../detail/{chebyshev.hpp => chebyshev.cuh} | 2 +- .../{correlation.hpp => correlation.cuh} | 2 +- .../detail/{cosine.hpp => cosine.cuh} | 2 +- ..._unexpanded.hpp => hamming_unexpanded.cuh} | 2 +- ...er_expanded.hpp => hellinger_expanded.cuh} | 2 +- ...{jensen_shannon.hpp => jensen_shannon.cuh} | 2 +- .../{kl_divergence.hpp => kl_divergence.cuh} | 2 +- .../specializations/detail/{l1.hpp => l1.cuh} | 2 +- .../{l2_expanded.hpp => l2_expanded.cuh} | 2 +- ...sqrt_expanded.hpp => l2_sqrt_expanded.cuh} | 2 +- ..._unexpanded.hpp => l2_sqrt_unexpanded.cuh} | 2 +- .../{l2_unexpanded.hpp => l2_unexpanded.cuh} | 2 +- .../{lp_unexpanded.hpp => lp_unexpanded.cuh} | 2 +- .../{distance.hpp => distance.cuh} | 30 +- cpp/include/raft/label/classlabels.cuh | 121 +++++ cpp/include/raft/label/classlabels.hpp | 7 +- cpp/include/raft/label/detail/classlabels.cuh | 4 +- .../raft/label/detail/merge_labels.cuh | 4 +- cpp/include/raft/label/merge_labels.cuh | 71 +++ cpp/include/raft/label/merge_labels.hpp | 9 +- cpp/include/raft/lap/detail/d_structs.h | 2 +- cpp/include/raft/lap/detail/lap_functions.cuh | 2 +- cpp/include/raft/lap/detail/lap_kernels.cuh | 2 +- cpp/include/raft/lap/{lap.hpp => lap.cuh} | 2 +- cpp/include/raft/linalg/add.cuh | 90 ++++ cpp/include/raft/linalg/add.hpp | 9 + cpp/include/raft/linalg/axpy.cuh | 55 ++ cpp/include/raft/linalg/axpy.hpp | 11 +- cpp/include/raft/linalg/binary_op.cuh | 58 +++ cpp/include/raft/linalg/binary_op.hpp | 9 + .../raft/linalg/cholesky_r1_update.cuh | 138 ++++++ .../raft/linalg/cholesky_r1_update.hpp | 11 +- .../raft/linalg/coalesced_reduction.cuh | 76 +++ .../raft/linalg/coalesced_reduction.hpp | 9 + cpp/include/raft/linalg/contractions.cuh | 211 ++++++++ cpp/include/raft/linalg/contractions.hpp | 9 + cpp/include/raft/linalg/cublas_macros.h | 116 +++++ cpp/include/raft/linalg/cusolver_macros.h | 112 +++++ cpp/include/raft/linalg/detail/add.cuh | 4 +- .../raft/linalg/detail/{axpy.hpp => axpy.cuh} | 0 ...y_r1_update.hpp => cholesky_r1_update.cuh} | 3 +- .../raft/linalg/detail/cublas_wrappers.hpp | 1 + .../linalg/detail/{divide.hpp => divide.cuh} | 2 +- .../raft/linalg/detail/{eig.hpp => eig.cuh} | 3 +- .../detail/{eltwise.hpp => eltwise.cuh} | 4 +- .../detail/{lanczos.hpp => lanczos.cuh} | 0 .../linalg/detail/{lstsq.hpp => lstsq.cuh} | 21 +- .../raft/linalg/detail/matrix_vector_op.cuh | 2 +- ...uared_error.hpp => mean_squared_error.cuh} | 2 +- .../detail/{multiply.hpp => multiply.cuh} | 2 +- .../raft/linalg/detail/{norm.hpp => norm.cuh} | 2 +- cpp/include/raft/linalg/detail/qr.cuh | 2 +- .../linalg/detail/{reduce.hpp => reduce.cuh} | 4 +- cpp/include/raft/linalg/detail/rsvd.cuh | 16 +- .../raft/linalg/detail/strided_reduction.cuh | 2 +- cpp/include/raft/linalg/detail/subtract.cuh | 4 +- .../raft/linalg/detail/{svd.hpp => svd.cuh} | 10 +- .../detail/{transpose.hpp => transpose.cuh} | 0 cpp/include/raft/linalg/divide.cuh | 49 ++ cpp/include/raft/linalg/divide.hpp | 11 +- cpp/include/raft/linalg/eig.cuh | 120 +++++ cpp/include/raft/linalg/eig.hpp | 11 +- cpp/include/raft/linalg/eltwise.cuh | 106 ++++ cpp/include/raft/linalg/eltwise.hpp | 11 +- cpp/include/raft/linalg/gemm.cuh | 179 +++++++ cpp/include/raft/linalg/gemm.hpp | 9 + cpp/include/raft/linalg/gemv.cuh | 211 ++++++++ cpp/include/raft/linalg/gemv.hpp | 9 + cpp/include/raft/linalg/init.cuh | 60 +++ cpp/include/raft/linalg/init.hpp | 9 + cpp/include/raft/linalg/lanczos.cuh | 162 ++++++ cpp/include/raft/linalg/lanczos.hpp | 11 +- cpp/include/raft/linalg/lstsq.cuh | 121 +++++ cpp/include/raft/linalg/lstsq.hpp | 11 +- cpp/include/raft/linalg/map.cuh | 54 ++ cpp/include/raft/linalg/map.hpp | 9 + cpp/include/raft/linalg/map_then_reduce.cuh | 91 ++++ cpp/include/raft/linalg/map_then_reduce.hpp | 9 + cpp/include/raft/linalg/matrix_vector_op.cuh | 105 ++++ cpp/include/raft/linalg/matrix_vector_op.hpp | 9 + .../raft/linalg/mean_squared_error.cuh | 47 ++ .../raft/linalg/mean_squared_error.hpp | 11 +- cpp/include/raft/linalg/multiply.cuh | 47 ++ cpp/include/raft/linalg/multiply.hpp | 11 +- cpp/include/raft/linalg/norm.cuh | 94 ++++ cpp/include/raft/linalg/norm.hpp | 11 +- cpp/include/raft/linalg/power.cuh | 8 +- cpp/include/raft/linalg/power.hpp | 74 +++ cpp/include/raft/linalg/qr.cuh | 78 +++ cpp/include/raft/linalg/qr.hpp | 9 + cpp/include/raft/linalg/reduce.cuh | 81 +++ cpp/include/raft/linalg/reduce.hpp | 11 +- .../raft/linalg/reduce_cols_by_key.cuh | 4 + .../raft/linalg/reduce_cols_by_key.hpp | 62 +++ .../raft/linalg/reduce_rows_by_key.cuh | 6 +- .../raft/linalg/reduce_rows_by_key.hpp | 119 +++++ cpp/include/raft/linalg/rsvd.cuh | 4 + cpp/include/raft/linalg/rsvd.hpp | 148 ++++++ cpp/include/raft/linalg/sqrt.cuh | 6 +- cpp/include/raft/linalg/sqrt.hpp | 53 ++ cpp/include/raft/linalg/strided_reduction.cuh | 77 +++ cpp/include/raft/linalg/strided_reduction.hpp | 9 + cpp/include/raft/linalg/subtract.cuh | 90 ++++ cpp/include/raft/linalg/subtract.hpp | 9 + cpp/include/raft/linalg/svd.cuh | 188 +++++++ cpp/include/raft/linalg/svd.hpp | 11 +- cpp/include/raft/linalg/ternary_op.cuh | 7 +- cpp/include/raft/linalg/ternary_op.hpp | 59 +++ cpp/include/raft/linalg/transpose.cuh | 61 +++ cpp/include/raft/linalg/transpose.hpp | 11 +- cpp/include/raft/linalg/unary_op.cuh | 77 +++ cpp/include/raft/linalg/unary_op.hpp | 9 + cpp/include/raft/matrix/col_wise_sort.cuh | 56 +++ cpp/include/raft/matrix/col_wise_sort.hpp | 9 + cpp/include/raft/matrix/detail/math.cuh | 10 +- cpp/include/raft/matrix/detail/matrix.cuh | 2 +- cpp/include/raft/matrix/math.cuh | 468 ++++++++++++++++++ cpp/include/raft/matrix/math.hpp | 11 +- cpp/include/raft/matrix/matrix.cuh | 278 +++++++++++ cpp/include/raft/matrix/matrix.hpp | 9 + cpp/include/raft/mr/buffer_base.hpp | 2 +- cpp/include/raft/mr/device/buffer.hpp | 2 +- cpp/include/raft/mr/host/buffer.hpp | 2 +- cpp/include/raft/random/detail/make_blobs.cuh | 4 +- .../raft/random/detail/make_regression.cuh | 14 +- .../random/detail/multi_variable_gaussian.cuh | 4 +- cpp/include/raft/random/make_blobs.cuh | 96 ++++ cpp/include/raft/random/make_blobs.hpp | 12 +- cpp/include/raft/random/make_regression.cuh | 105 ++++ cpp/include/raft/random/make_regression.hpp | 12 +- .../raft/random/multi_variable_gaussian.cuh | 64 +++ .../raft/random/multi_variable_gaussian.hpp | 11 +- cpp/include/raft/random/permute.cuh | 63 +++ cpp/include/raft/random/permute.hpp | 11 +- cpp/include/raft/random/rng.cuh | 380 ++++++++++++++ cpp/include/raft/random/rng.hpp | 9 + cpp/include/raft/sparse/convert/coo.cuh | 46 ++ cpp/include/raft/sparse/convert/coo.hpp | 13 +- cpp/include/raft/sparse/convert/csr.cuh | 142 ++++++ cpp/include/raft/sparse/convert/csr.hpp | 13 +- cpp/include/raft/sparse/convert/dense.cuh | 67 +++ cpp/include/raft/sparse/convert/dense.hpp | 13 +- .../raft/sparse/convert/detail/coo.cuh | 2 +- .../raft/sparse/convert/detail/csr.cuh | 6 +- .../raft/sparse/convert/detail/dense.cuh | 2 +- cpp/include/raft/sparse/detail/csr.cuh | 2 +- .../raft/sparse/detail/cusparse_macros.h | 2 +- .../raft/sparse/detail/cusparse_wrappers.h | 2 +- .../sparse/distance/detail/bin_distance.cuh | 2 +- .../raft/sparse/distance/detail/coo_spmv.cuh | 2 +- .../sparse/distance/detail/ip_distance.cuh | 6 +- .../sparse/distance/detail/l2_distance.cuh | 6 +- .../sparse/distance/detail/lp_distance.cuh | 4 +- .../raft/sparse/distance/detail/utils.cuh | 2 +- cpp/include/raft/sparse/distance/distance.cuh | 137 +++++ cpp/include/raft/sparse/distance/distance.hpp | 11 +- .../sparse/hierarchy/detail/agglomerative.cuh | 2 +- .../hierarchy/detail/connectivities.cuh | 8 +- .../raft/sparse/hierarchy/detail/mst.cuh | 6 +- ...{single_linkage.hpp => single_linkage.cuh} | 2 +- .../raft/sparse/hierarchy/single_linkage.cuh | 65 +++ .../raft/sparse/hierarchy/single_linkage.hpp | 13 +- cpp/include/raft/sparse/linalg/add.cuh | 99 ++++ cpp/include/raft/sparse/linalg/add.hpp | 11 +- cpp/include/raft/sparse/linalg/degree.cuh | 123 +++++ cpp/include/raft/sparse/linalg/degree.hpp | 11 +- cpp/include/raft/sparse/linalg/detail/add.cuh | 2 +- .../raft/sparse/linalg/detail/norm.cuh | 2 +- .../raft/sparse/linalg/detail/spectral.cuh | 8 +- .../raft/sparse/linalg/detail/symmetrize.cuh | 8 +- .../raft/sparse/linalg/detail/transpose.h | 2 +- cpp/include/raft/sparse/linalg/norm.cuh | 73 +++ cpp/include/raft/sparse/linalg/norm.hpp | 13 +- cpp/include/raft/sparse/linalg/spectral.cuh | 43 ++ cpp/include/raft/sparse/linalg/spectral.hpp | 11 +- cpp/include/raft/sparse/linalg/symmetrize.cuh | 168 +++++++ cpp/include/raft/sparse/linalg/symmetrize.hpp | 11 +- cpp/include/raft/sparse/linalg/transpose.cuh | 74 +++ cpp/include/raft/sparse/linalg/transpose.hpp | 11 +- cpp/include/raft/sparse/mst/mst.cuh | 6 +- cpp/include/raft/sparse/mst/mst.hpp | 63 +++ cpp/include/raft/sparse/op/detail/filter.cuh | 4 +- cpp/include/raft/sparse/op/detail/reduce.cuh | 6 +- cpp/include/raft/sparse/op/detail/row_op.cuh | 2 +- .../sparse/op/detail/{slice.h => slice.cuh} | 4 +- cpp/include/raft/sparse/op/detail/sort.h | 2 +- cpp/include/raft/sparse/op/filter.cuh | 94 ++++ cpp/include/raft/sparse/op/filter.hpp | 11 +- cpp/include/raft/sparse/op/reduce.cuh | 87 ++++ cpp/include/raft/sparse/op/reduce.hpp | 11 +- cpp/include/raft/sparse/op/row_op.cuh | 48 ++ cpp/include/raft/sparse/op/row_op.hpp | 11 +- cpp/include/raft/sparse/op/slice.cuh | 81 +++ cpp/include/raft/sparse/op/slice.hpp | 13 +- cpp/include/raft/sparse/op/sort.cuh | 78 +++ cpp/include/raft/sparse/op/sort.hpp | 11 +- .../sparse/selection/connect_components.cuh | 82 +++ .../sparse/selection/connect_components.hpp | 11 +- .../selection/detail/connect_components.cuh | 14 +- .../raft/sparse/selection/detail/knn.cuh | 12 +- .../sparse/selection/detail/knn_graph.cuh | 6 +- cpp/include/raft/sparse/selection/knn.cuh | 102 ++++ cpp/include/raft/sparse/selection/knn.hpp | 11 +- .../raft/sparse/selection/knn_graph.cuh | 63 +++ .../raft/sparse/selection/knn_graph.hpp | 11 +- cpp/include/raft/spatial/knn/ann.cuh | 87 ++++ cpp/include/raft/spatial/knn/ann.hpp | 11 +- cpp/include/raft/spatial/knn/ann_common.h | 2 +- cpp/include/raft/spatial/knn/ball_cover.cuh | 192 +++++++ cpp/include/raft/spatial/knn/ball_cover.hpp | 9 + .../raft/spatial/knn/ball_cover_common.h | 2 +- .../knn/detail/ann_quantized_faiss.cuh | 4 +- .../raft/spatial/knn/detail/ball_cover.cuh | 6 +- .../raft/spatial/knn/detail/common_faiss.h | 2 +- .../knn/detail/epsilon_neighborhood.cuh | 2 +- .../raft/spatial/knn/detail/fused_l2_knn.cuh | 4 +- .../spatial/knn/detail/haversine_distance.cuh | 2 +- .../knn/detail/knn_brute_force_faiss.cuh | 2 +- .../raft/spatial/knn/detail/processing.hpp | 12 +- .../raft/spatial/knn/epsilon_neighborhood.cuh | 64 +++ .../raft/spatial/knn/epsilon_neighborhood.hpp | 9 + cpp/include/raft/spatial/knn/knn.cuh | 162 ++++++ cpp/include/raft/spatial/knn/knn.hpp | 11 +- .../raft/spatial/knn/specializations.cuh | 26 + .../raft/spatial/knn/specializations.hpp | 17 +- .../{ball_cover.hpp => ball_cover.cuh} | 4 +- .../{fused_l2_knn.hpp => fused_l2_knn.cuh} | 2 +- .../knn/specializations/{knn.hpp => knn.cuh} | 4 +- cpp/include/raft/spectral/cluster_solvers.cuh | 84 ++++ cpp/include/raft/spectral/cluster_solvers.hpp | 15 +- cpp/include/raft/spectral/detail/lapack.hpp | 2 +- ...atrix_wrappers.cuh => matrix_wrappers.hpp} | 2 +- .../detail/modularity_maximization.hpp | 6 +- .../raft/spectral/detail/partition.hpp | 6 +- .../raft/spectral/detail/spectral_util.cuh | 2 +- cpp/include/raft/spectral/eigen_solvers.cuh | 107 ++++ cpp/include/raft/spectral/eigen_solvers.hpp | 14 +- cpp/include/raft/spectral/matrix_wrappers.hpp | 4 +- .../raft/spectral/modularity_maximization.cuh | 92 ++++ .../raft/spectral/modularity_maximization.hpp | 11 +- cpp/include/raft/spectral/partition.cuh | 102 ++++ cpp/include/raft/spectral/partition.hpp | 12 +- cpp/include/raft/stats/accuracy.cuh | 45 ++ cpp/include/raft/stats/accuracy.hpp | 9 + .../raft/stats/adjusted_rand_index.cuh | 54 ++ .../raft/stats/adjusted_rand_index.hpp | 10 + cpp/include/raft/stats/completeness_score.cuh | 52 ++ cpp/include/raft/stats/completeness_score.hpp | 11 +- cpp/include/raft/stats/contingency_matrix.cuh | 106 ++++ cpp/include/raft/stats/contingency_matrix.hpp | 9 + cpp/include/raft/stats/cov.cuh | 63 +++ cpp/include/raft/stats/cov.hpp | 9 + .../raft/stats/detail/adjusted_rand_index.cuh | 6 +- .../detail/batched/information_criterion.cuh | 2 +- .../raft/stats/detail/completeness_score.cuh | 6 +- cpp/include/raft/stats/detail/cov.cuh | 4 +- cpp/include/raft/stats/detail/dispersion.cuh | 2 +- cpp/include/raft/stats/detail/entropy.cuh | 4 +- .../raft/stats/detail/homogeneity_score.cuh | 4 +- .../raft/stats/detail/kl_divergence.cuh | 2 +- cpp/include/raft/stats/detail/mean.cuh | 4 +- cpp/include/raft/stats/detail/mean_center.cuh | 2 +- cpp/include/raft/stats/detail/meanvar.cuh | 2 +- .../raft/stats/detail/mutual_info_score.cuh | 4 +- cpp/include/raft/stats/detail/scores.cuh | 10 +- .../raft/stats/detail/silhouette_score.cuh | 12 +- cpp/include/raft/stats/detail/stddev.cuh | 4 +- cpp/include/raft/stats/detail/sum.cuh | 4 +- .../stats/detail/trustworthiness_score.cuh | 6 +- cpp/include/raft/stats/detail/v_measure.cuh | 4 +- .../raft/stats/detail/weighted_mean.cuh | 4 +- cpp/include/raft/stats/dispersion.cuh | 61 +++ cpp/include/raft/stats/dispersion.hpp | 9 + cpp/include/raft/stats/entropy.cuh | 50 ++ cpp/include/raft/stats/entropy.hpp | 9 + cpp/include/raft/stats/histogram.cuh | 67 +++ cpp/include/raft/stats/histogram.hpp | 9 + cpp/include/raft/stats/homogeneity_score.cuh | 53 ++ cpp/include/raft/stats/homogeneity_score.hpp | 10 + .../raft/stats/information_criterion.cuh | 68 +++ .../raft/stats/information_criterion.hpp | 10 + cpp/include/raft/stats/kl_divergence.cuh | 47 ++ cpp/include/raft/stats/kl_divergence.hpp | 10 + cpp/include/raft/stats/mean.cuh | 56 +++ cpp/include/raft/stats/mean.hpp | 11 +- cpp/include/raft/stats/mean_center.cuh | 84 ++++ cpp/include/raft/stats/mean_center.hpp | 11 +- cpp/include/raft/stats/meanvar.cuh | 60 +++ cpp/include/raft/stats/meanvar.hpp | 9 + cpp/include/raft/stats/minmax.cuh | 73 +++ cpp/include/raft/stats/minmax.hpp | 9 + cpp/include/raft/stats/mutual_info_score.cuh | 52 ++ cpp/include/raft/stats/mutual_info_score.hpp | 10 + cpp/include/raft/stats/r2_score.cuh | 51 ++ cpp/include/raft/stats/r2_score.hpp | 9 + cpp/include/raft/stats/rand_index.cuh | 43 ++ cpp/include/raft/stats/rand_index.hpp | 9 + cpp/include/raft/stats/regression_metrics.cuh | 55 ++ cpp/include/raft/stats/regression_metrics.hpp | 9 + cpp/include/raft/stats/silhouette_score.cuh | 79 +++ cpp/include/raft/stats/silhouette_score.hpp | 9 + cpp/include/raft/stats/specializations.cuh | 24 + cpp/include/raft/stats/specializations.hpp | 13 +- cpp/include/raft/stats/stddev.cuh | 93 ++++ cpp/include/raft/stats/stddev.hpp | 11 +- cpp/include/raft/stats/sum.cuh | 52 ++ cpp/include/raft/stats/sum.hpp | 11 +- .../raft/stats/trustworthiness_score.cuh | 54 ++ .../raft/stats/trustworthiness_score.hpp | 9 + cpp/include/raft/stats/v_measure.cuh | 53 ++ cpp/include/raft/stats/v_measure.hpp | 12 +- cpp/include/raft/stats/weighted_mean.cuh | 65 +++ cpp/include/raft/stats/weighted_mean.hpp | 9 + ...jensen_shannon_double_double_double_int.cu | 2 +- .../jensen_shannon_float_float_float_int.cu | 2 +- ...jensen_shannon_float_float_float_uint32.cu | 2 +- .../kl_divergence_double_double_double_int.cu | 2 +- .../kl_divergence_float_float_float_int.cu | 2 +- .../kl_divergence_float_float_float_uint32.cu | 2 +- .../detail/l1_double_double_double_int.cu | 2 +- .../detail/l1_float_float_float_int.cu | 2 +- .../detail/l1_float_float_float_uint32.cu | 2 +- .../l2_expanded_double_double_double_int.cu | 2 +- .../l2_expanded_float_float_float_int.cu | 2 +- .../l2_expanded_float_float_float_uint32.cu | 2 +- ..._sqrt_expanded_double_double_double_int.cu | 2 +- .../l2_sqrt_expanded_float_float_float_int.cu | 2 +- ..._sqrt_expanded_float_float_float_uint32.cu | 2 +- ...qrt_unexpanded_double_double_double_int.cu | 2 +- ...2_sqrt_unexpanded_float_float_float_int.cu | 2 +- ...qrt_unexpanded_float_float_float_uint32.cu | 2 +- .../l2_unexpanded_double_double_double_int.cu | 2 +- .../l2_unexpanded_float_float_float_int.cu | 2 +- .../l2_unexpanded_float_float_float_uint32.cu | 2 +- .../lp_unexpanded_double_double_double_int.cu | 2 +- .../lp_unexpanded_float_float_float_int.cu | 2 +- .../lp_unexpanded_float_float_float_uint32.cu | 2 +- cpp/src/nn/specializations/ball_cover.cu | 10 +- cpp/src/nn/specializations/knn.cu | 4 +- cpp/test/CMakeLists.txt | 2 +- cpp/test/cluster_solvers.cu | 6 +- cpp/test/distance/dist_adj.cu | 6 +- cpp/test/distance/distance_base.cuh | 6 +- cpp/test/distance/fused_l2_nn.cu | 8 +- cpp/test/eigen_solvers.cu | 6 +- cpp/test/handle.cpp | 2 +- cpp/test/label/label.cu | 4 +- cpp/test/label/merge_labels.cu | 4 +- cpp/test/lap/lap.cu | 4 +- cpp/test/linalg/add.cu | 6 +- cpp/test/linalg/add.cuh | 4 +- cpp/test/linalg/binary_op.cu | 6 +- cpp/test/linalg/binary_op.cuh | 4 +- cpp/test/linalg/cholesky_r1.cu | 4 +- cpp/test/linalg/coalesced_reduction.cu | 6 +- cpp/test/linalg/divide.cu | 6 +- cpp/test/linalg/eig.cu | 6 +- cpp/test/linalg/eig_sel.cu | 2 +- cpp/test/linalg/eltwise.cu | 6 +- cpp/test/linalg/gemm_layout.cu | 6 +- cpp/test/linalg/gemv.cu | 6 +- cpp/test/linalg/map.cu | 8 +- cpp/test/linalg/map_then_reduce.cu | 6 +- cpp/test/linalg/matrix_vector_op.cu | 4 +- cpp/test/linalg/matrix_vector_op.cuh | 4 +- cpp/test/linalg/multiply.cu | 6 +- cpp/test/linalg/norm.cu | 6 +- cpp/test/linalg/power.cu | 2 +- cpp/test/linalg/reduce.cu | 6 +- cpp/test/linalg/reduce.cuh | 4 +- cpp/test/linalg/reduce_cols_by_key.cu | 2 +- cpp/test/linalg/reduce_rows_by_key.cu | 2 +- cpp/test/linalg/rsvd.cu | 2 +- cpp/test/linalg/sqrt.cu | 2 +- cpp/test/linalg/strided_reduction.cu | 6 +- cpp/test/linalg/subtract.cu | 6 +- cpp/test/linalg/svd.cu | 8 +- cpp/test/linalg/ternary_op.cu | 2 +- cpp/test/linalg/transpose.cu | 6 +- cpp/test/linalg/unary_op.cu | 6 +- cpp/test/linalg/unary_op.cuh | 4 +- cpp/test/matrix/columnSort.cu | 2 +- cpp/test/matrix/linewise_op.cu | 8 +- cpp/test/matrix/math.cu | 6 +- cpp/test/matrix/matrix.cu | 6 +- cpp/test/mr/device/buffer.cpp | 2 +- cpp/test/mr/host/buffer.cpp | 2 +- cpp/test/random/make_blobs.cu | 2 +- cpp/test/random/make_regression.cu | 6 +- cpp/test/random/multi_variable_gaussian.cu | 2 +- cpp/test/random/permute.cu | 4 +- cpp/test/random/rng.cu | 6 +- cpp/test/random/rng_int.cu | 2 +- cpp/test/random/sample_without_replacement.cu | 2 +- cpp/test/sparse/add.cu | 6 +- cpp/test/sparse/connect_components.cu | 12 +- cpp/test/sparse/convert_coo.cu | 6 +- cpp/test/sparse/convert_csr.cu | 6 +- cpp/test/sparse/csr_row_slice.cu | 4 +- cpp/test/sparse/csr_to_dense.cu | 4 +- cpp/test/sparse/csr_transpose.cu | 4 +- cpp/test/sparse/degree.cu | 6 +- cpp/test/sparse/dist_coo_spmv.cu | 6 +- cpp/test/sparse/distance.cu | 4 +- cpp/test/sparse/filter.cu | 8 +- cpp/test/sparse/knn.cu | 4 +- cpp/test/sparse/knn_graph.cu | 8 +- cpp/test/sparse/linkage.cu | 6 +- cpp/test/sparse/norm.cu | 6 +- cpp/test/sparse/reduce.cu | 4 +- cpp/test/sparse/row_op.cu | 6 +- cpp/test/sparse/sort.cu | 6 +- cpp/test/sparse/symmetrize.cu | 8 +- cpp/test/spatial/ball_cover.cu | 2 +- cpp/test/spatial/epsilon_neighborhood.cu | 4 +- cpp/test/spatial/faiss_mr.cu | 4 +- cpp/test/spatial/fused_l2_knn.cu | 6 +- cpp/test/spatial/haversine.cu | 2 +- cpp/test/spatial/knn.cu | 6 +- cpp/test/spatial/selection.cu | 6 +- cpp/test/spatial/spatial_data.h | 58 ++- cpp/test/spectral_matrix.cu | 2 +- cpp/test/stats/adjusted_rand_index.cu | 4 +- cpp/test/stats/completeness_score.cu | 6 +- cpp/test/stats/contingencyMatrix.cu | 2 +- cpp/test/stats/cov.cu | 6 +- cpp/test/stats/dispersion.cu | 4 +- cpp/test/stats/entropy.cu | 2 +- cpp/test/stats/histogram.cu | 4 +- cpp/test/stats/homogeneity_score.cu | 4 +- cpp/test/stats/information_criterion.cu | 2 +- cpp/test/stats/kl_divergence.cu | 2 +- cpp/test/stats/mean.cu | 6 +- cpp/test/stats/mean_center.cu | 8 +- cpp/test/stats/meanvar.cu | 6 +- cpp/test/stats/minmax.cu | 4 +- cpp/test/stats/mutual_info_score.cu | 4 +- cpp/test/stats/rand_index.cu | 2 +- cpp/test/stats/silhouette_score.cu | 4 +- cpp/test/stats/stddev.cu | 10 +- cpp/test/stats/sum.cu | 8 +- cpp/test/stats/trustworthiness.cu | 6 +- cpp/test/stats/v_measure.cu | 4 +- cpp/test/stats/weighted_mean.cu | 4 +- python/raft/dask/common/comms_utils.pyx | 2 +- python/raft/dask/common/nccl.pyx | 2 +- 471 files changed, 11388 insertions(+), 646 deletions(-) rename cpp/include/raft/cluster/{kmeans.hpp => kmeans.cuh} (98%) create mode 100644 cpp/include/raft/distance/distance.cuh create mode 100644 cpp/include/raft/distance/fused_l2_nn.cuh create mode 100644 cpp/include/raft/distance/specializations.cuh rename cpp/include/raft/distance/specializations/detail/{canberra.hpp => canberra.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{chebyshev.hpp => chebyshev.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{correlation.hpp => correlation.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{cosine.hpp => cosine.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{hamming_unexpanded.hpp => hamming_unexpanded.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{hellinger_expanded.hpp => hellinger_expanded.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{jensen_shannon.hpp => jensen_shannon.cuh} (98%) rename cpp/include/raft/distance/specializations/detail/{kl_divergence.hpp => kl_divergence.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{l1.hpp => l1.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{l2_expanded.hpp => l2_expanded.cuh} (98%) rename cpp/include/raft/distance/specializations/detail/{l2_sqrt_expanded.hpp => l2_sqrt_expanded.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{l2_sqrt_unexpanded.hpp => l2_sqrt_unexpanded.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{l2_unexpanded.hpp => l2_unexpanded.cuh} (97%) rename cpp/include/raft/distance/specializations/detail/{lp_unexpanded.hpp => lp_unexpanded.cuh} (97%) rename cpp/include/raft/distance/specializations/{distance.hpp => distance.cuh} (54%) create mode 100644 cpp/include/raft/label/classlabels.cuh create mode 100644 cpp/include/raft/label/merge_labels.cuh rename cpp/include/raft/lap/{lap.hpp => lap.cuh} (99%) create mode 100644 cpp/include/raft/linalg/add.cuh create mode 100644 cpp/include/raft/linalg/axpy.cuh create mode 100644 cpp/include/raft/linalg/binary_op.cuh create mode 100644 cpp/include/raft/linalg/cholesky_r1_update.cuh create mode 100644 cpp/include/raft/linalg/coalesced_reduction.cuh create mode 100644 cpp/include/raft/linalg/contractions.cuh create mode 100644 cpp/include/raft/linalg/cublas_macros.h create mode 100644 cpp/include/raft/linalg/cusolver_macros.h rename cpp/include/raft/linalg/detail/{axpy.hpp => axpy.cuh} (100%) rename cpp/include/raft/linalg/detail/{cholesky_r1_update.hpp => cholesky_r1_update.cuh} (98%) rename cpp/include/raft/linalg/detail/{divide.hpp => divide.cuh} (96%) rename cpp/include/raft/linalg/detail/{eig.hpp => eig.cuh} (99%) rename cpp/include/raft/linalg/detail/{eltwise.hpp => eltwise.cuh} (97%) rename cpp/include/raft/linalg/detail/{lanczos.hpp => lanczos.cuh} (100%) rename cpp/include/raft/linalg/detail/{lstsq.hpp => lstsq.cuh} (98%) rename cpp/include/raft/linalg/detail/{mean_squared_error.hpp => mean_squared_error.cuh} (96%) rename cpp/include/raft/linalg/detail/{multiply.hpp => multiply.cuh} (96%) rename cpp/include/raft/linalg/detail/{norm.hpp => norm.cuh} (99%) rename cpp/include/raft/linalg/detail/{reduce.hpp => reduce.cuh} (95%) rename cpp/include/raft/linalg/detail/{svd.hpp => svd.cuh} (98%) rename cpp/include/raft/linalg/detail/{transpose.hpp => transpose.cuh} (100%) create mode 100644 cpp/include/raft/linalg/divide.cuh create mode 100644 cpp/include/raft/linalg/eig.cuh create mode 100644 cpp/include/raft/linalg/eltwise.cuh create mode 100644 cpp/include/raft/linalg/gemm.cuh create mode 100644 cpp/include/raft/linalg/gemv.cuh create mode 100644 cpp/include/raft/linalg/init.cuh create mode 100644 cpp/include/raft/linalg/lanczos.cuh create mode 100644 cpp/include/raft/linalg/lstsq.cuh create mode 100644 cpp/include/raft/linalg/map.cuh create mode 100644 cpp/include/raft/linalg/map_then_reduce.cuh create mode 100644 cpp/include/raft/linalg/matrix_vector_op.cuh create mode 100644 cpp/include/raft/linalg/mean_squared_error.cuh create mode 100644 cpp/include/raft/linalg/multiply.cuh create mode 100644 cpp/include/raft/linalg/norm.cuh create mode 100644 cpp/include/raft/linalg/power.hpp create mode 100644 cpp/include/raft/linalg/qr.cuh create mode 100644 cpp/include/raft/linalg/reduce.cuh create mode 100644 cpp/include/raft/linalg/reduce_cols_by_key.hpp create mode 100644 cpp/include/raft/linalg/reduce_rows_by_key.hpp create mode 100644 cpp/include/raft/linalg/rsvd.hpp create mode 100644 cpp/include/raft/linalg/sqrt.hpp create mode 100644 cpp/include/raft/linalg/strided_reduction.cuh create mode 100644 cpp/include/raft/linalg/subtract.cuh create mode 100644 cpp/include/raft/linalg/svd.cuh create mode 100644 cpp/include/raft/linalg/ternary_op.hpp create mode 100644 cpp/include/raft/linalg/transpose.cuh create mode 100644 cpp/include/raft/linalg/unary_op.cuh create mode 100644 cpp/include/raft/matrix/col_wise_sort.cuh create mode 100644 cpp/include/raft/matrix/math.cuh create mode 100644 cpp/include/raft/matrix/matrix.cuh create mode 100644 cpp/include/raft/random/make_blobs.cuh create mode 100644 cpp/include/raft/random/make_regression.cuh create mode 100644 cpp/include/raft/random/multi_variable_gaussian.cuh create mode 100644 cpp/include/raft/random/permute.cuh create mode 100644 cpp/include/raft/random/rng.cuh create mode 100644 cpp/include/raft/sparse/convert/coo.cuh create mode 100644 cpp/include/raft/sparse/convert/csr.cuh create mode 100644 cpp/include/raft/sparse/convert/dense.cuh create mode 100644 cpp/include/raft/sparse/distance/distance.cuh rename cpp/include/raft/sparse/hierarchy/detail/{single_linkage.hpp => single_linkage.cuh} (99%) create mode 100644 cpp/include/raft/sparse/hierarchy/single_linkage.cuh create mode 100644 cpp/include/raft/sparse/linalg/add.cuh create mode 100644 cpp/include/raft/sparse/linalg/degree.cuh create mode 100644 cpp/include/raft/sparse/linalg/norm.cuh create mode 100644 cpp/include/raft/sparse/linalg/spectral.cuh create mode 100644 cpp/include/raft/sparse/linalg/symmetrize.cuh create mode 100644 cpp/include/raft/sparse/linalg/transpose.cuh create mode 100644 cpp/include/raft/sparse/mst/mst.hpp rename cpp/include/raft/sparse/op/detail/{slice.h => slice.cuh} (97%) create mode 100644 cpp/include/raft/sparse/op/filter.cuh create mode 100644 cpp/include/raft/sparse/op/reduce.cuh create mode 100644 cpp/include/raft/sparse/op/row_op.cuh create mode 100644 cpp/include/raft/sparse/op/slice.cuh create mode 100644 cpp/include/raft/sparse/op/sort.cuh create mode 100644 cpp/include/raft/sparse/selection/connect_components.cuh create mode 100644 cpp/include/raft/sparse/selection/knn.cuh create mode 100644 cpp/include/raft/sparse/selection/knn_graph.cuh create mode 100644 cpp/include/raft/spatial/knn/ann.cuh create mode 100644 cpp/include/raft/spatial/knn/ball_cover.cuh create mode 100644 cpp/include/raft/spatial/knn/epsilon_neighborhood.cuh create mode 100644 cpp/include/raft/spatial/knn/knn.cuh create mode 100644 cpp/include/raft/spatial/knn/specializations.cuh rename cpp/include/raft/spatial/knn/specializations/{ball_cover.hpp => ball_cover.cuh} (95%) rename cpp/include/raft/spatial/knn/specializations/{fused_l2_knn.hpp => fused_l2_knn.cuh} (98%) rename cpp/include/raft/spatial/knn/specializations/{knn.hpp => knn.cuh} (97%) create mode 100644 cpp/include/raft/spectral/cluster_solvers.cuh rename cpp/include/raft/spectral/detail/{matrix_wrappers.cuh => matrix_wrappers.hpp} (99%) create mode 100644 cpp/include/raft/spectral/eigen_solvers.cuh create mode 100644 cpp/include/raft/spectral/modularity_maximization.cuh create mode 100644 cpp/include/raft/spectral/partition.cuh create mode 100644 cpp/include/raft/stats/accuracy.cuh create mode 100644 cpp/include/raft/stats/adjusted_rand_index.cuh create mode 100644 cpp/include/raft/stats/completeness_score.cuh create mode 100644 cpp/include/raft/stats/contingency_matrix.cuh create mode 100644 cpp/include/raft/stats/cov.cuh create mode 100644 cpp/include/raft/stats/dispersion.cuh create mode 100644 cpp/include/raft/stats/entropy.cuh create mode 100644 cpp/include/raft/stats/histogram.cuh create mode 100644 cpp/include/raft/stats/homogeneity_score.cuh create mode 100644 cpp/include/raft/stats/information_criterion.cuh create mode 100644 cpp/include/raft/stats/kl_divergence.cuh create mode 100644 cpp/include/raft/stats/mean.cuh create mode 100644 cpp/include/raft/stats/mean_center.cuh create mode 100644 cpp/include/raft/stats/meanvar.cuh create mode 100644 cpp/include/raft/stats/minmax.cuh create mode 100644 cpp/include/raft/stats/mutual_info_score.cuh create mode 100644 cpp/include/raft/stats/r2_score.cuh create mode 100644 cpp/include/raft/stats/rand_index.cuh create mode 100644 cpp/include/raft/stats/regression_metrics.cuh create mode 100644 cpp/include/raft/stats/silhouette_score.cuh create mode 100644 cpp/include/raft/stats/specializations.cuh create mode 100644 cpp/include/raft/stats/stddev.cuh create mode 100644 cpp/include/raft/stats/sum.cuh create mode 100644 cpp/include/raft/stats/trustworthiness_score.cuh create mode 100644 cpp/include/raft/stats/v_measure.cuh create mode 100644 cpp/include/raft/stats/weighted_mean.cuh diff --git a/build.sh b/build.sh index 9a3295321f..9d3a796c65 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # cuml build script diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index a832f67aaf..83521e5d11 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. ######################## # RAFT Version Updater # ######################## diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 8c29d2b321..51ed34754b 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 51e4037c60..f3777405c0 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/cluster/kmeans.hpp b/cpp/include/raft/cluster/kmeans.cuh similarity index 98% rename from cpp/include/raft/cluster/kmeans.hpp rename to cpp/include/raft/cluster/kmeans.cuh index ab0fbb04c7..28d4ae0719 100644 --- a/cpp/include/raft/cluster/kmeans.hpp +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/comms.hpp b/cpp/include/raft/comms/comms.hpp index 14c33c6cf2..05678a7e49 100644 --- a/cpp/include/raft/comms/comms.hpp +++ b/cpp/include/raft/comms/comms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/comms_test.hpp b/cpp/include/raft/comms/comms_test.hpp index 1acb72bc85..f01060cb40 100644 --- a/cpp/include/raft/comms/comms_test.hpp +++ b/cpp/include/raft/comms/comms_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/detail/ucp_helper.hpp b/cpp/include/raft/comms/detail/ucp_helper.hpp index 6ba66fb6f3..ef93ae90c5 100644 --- a/cpp/include/raft/comms/detail/ucp_helper.hpp +++ b/cpp/include/raft/comms/detail/ucp_helper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/helper.hpp b/cpp/include/raft/comms/helper.hpp index d83e8f4d4f..b1aae86556 100644 --- a/cpp/include/raft/comms/helper.hpp +++ b/cpp/include/raft/comms/helper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/mpi_comms.hpp b/cpp/include/raft/comms/mpi_comms.hpp index 3fab04c441..ca5275cd06 100644 --- a/cpp/include/raft/comms/mpi_comms.hpp +++ b/cpp/include/raft/comms/mpi_comms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 6fa0f7e37b..7604606ba1 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/detail/correlation.cuh b/cpp/include/raft/distance/detail/correlation.cuh index 21d04f3f8d..c88d5afeab 100644 --- a/cpp/include/raft/distance/detail/correlation.cuh +++ b/cpp/include/raft/distance/detail/correlation.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include #include -#include +#include namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/cosine.cuh b/cpp/include/raft/distance/detail/cosine.cuh index bead5f1f71..b7eed3e2a8 100644 --- a/cpp/include/raft/distance/detail/cosine.cuh +++ b/cpp/include/raft/distance/detail/cosine.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include -#include +#include namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/distance.cuh b/cpp/include/raft/distance/detail/distance.cuh index 45850de115..4782afe46e 100644 --- a/cpp/include/raft/distance/detail/distance.cuh +++ b/cpp/include/raft/distance/detail/distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/detail/euclidean.cuh b/cpp/include/raft/distance/detail/euclidean.cuh index 4786f584c4..d83e81b6a9 100644 --- a/cpp/include/raft/distance/detail/euclidean.cuh +++ b/cpp/include/raft/distance/detail/euclidean.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once #include -#include +#include namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh index 80eb6021ef..81d02c410c 100644 --- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh +++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include namespace raft { diff --git a/cpp/include/raft/distance/detail/hellinger.cuh b/cpp/include/raft/distance/detail/hellinger.cuh index 3cb0469803..31854fd1d6 100644 --- a/cpp/include/raft/distance/detail/hellinger.cuh +++ b/cpp/include/raft/distance/detail/hellinger.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once #include -#include +#include namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh index 996cc544a6..9d203c0c4f 100644 --- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh +++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh @@ -16,8 +16,8 @@ #pragma once #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/include/raft/distance/distance.cuh b/cpp/include/raft/distance/distance.cuh new file mode 100644 index 0000000000..71c9e8d32b --- /dev/null +++ b/cpp/include/raft/distance/distance.cuh @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DISTANCE_H +#define __DISTANCE_H + +#pragma once + +#include +#include +#include +#include + +namespace raft { +namespace distance { + +/** + * @brief Evaluate pairwise distances with the user epilogue lamba allowed + * @tparam DistanceType which distance to evaluate + * @tparam InType input argument type + * @tparam AccType accumulation type + * @tparam OutType output type + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param x first set of points + * @param y second set of points + * @param dist output distance matrix + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * @param workspace temporary workspace needed for computations + * @param worksize number of bytes of the workspace + * @param fin_op the final gemm epilogue lambda + * @param stream cuda stream + * @param isRowMajor whether the matrices are row-major or col-major + * @param metric_arg metric argument (used for Minkowski distance) + * + * @note fin_op: This is a device lambda which is supposed to operate upon the + * input which is AccType and returns the output in OutType. It's signature is + * as follows:
OutType fin_op(AccType in, int g_idx);
. If one needs + * any other parameters, feel free to pass them via closure. + */ +template +void distance(const InType* x, + const InType* y, + OutType* dist, + Index_ m, + Index_ n, + Index_ k, + void* workspace, + size_t worksize, + FinalLambda fin_op, + cudaStream_t stream, + bool isRowMajor = true, + InType metric_arg = 2.0f) +{ + detail::distance( + x, y, dist, m, n, k, workspace, worksize, fin_op, stream, isRowMajor, metric_arg); +} + +/** + * @brief Evaluate pairwise distances for the simple use case + * @tparam DistanceType which distance to evaluate + * @tparam InType input argument type + * @tparam AccType accumulation type + * @tparam OutType output type + * @tparam Index_ Index type + * @param x first set of points + * @param y second set of points + * @param dist output distance matrix + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * @param workspace temporary workspace needed for computations + * @param worksize number of bytes of the workspace + * @param stream cuda stream + * @param isRowMajor whether the matrices are row-major or col-major + * @param metric_arg metric argument (used for Minkowski distance) + * + * @note if workspace is passed as nullptr, this will return in + * worksize, the number of bytes of workspace required + */ +template +void distance(const InType* x, + const InType* y, + OutType* dist, + Index_ m, + Index_ n, + Index_ k, + void* workspace, + size_t worksize, + cudaStream_t stream, + bool isRowMajor = true, + InType metric_arg = 2.0f) +{ + detail::distance( + x, y, dist, m, n, k, workspace, worksize, stream, isRowMajor, metric_arg); +} + +/** + * @brief Return the exact workspace size to compute the distance + * @tparam DistanceType which distance to evaluate + * @tparam InType input argument type + * @tparam AccType accumulation type + * @tparam OutType output type + * @tparam Index_ Index type + * @param x first set of points + * @param y second set of points + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * + * @note If the specified distanceType doesn't need the workspace at all, it + * returns 0. + */ +template +size_t getWorkspaceSize(const InType* x, const InType* y, Index_ m, Index_ n, Index_ k) +{ + return detail::getWorkspaceSize(x, y, m, n, k); +} + +/** + * @brief Evaluate pairwise distances for the simple use case + * @tparam DistanceType which distance to evaluate + * @tparam InType input argument type + * @tparam AccType accumulation type + * @tparam OutType output type + * @tparam Index_ Index type + * @param x first set of points + * @param y second set of points + * @param dist output distance matrix + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * @param stream cuda stream + * @param isRowMajor whether the matrices are row-major or col-major + * @param metric_arg metric argument (used for Minkowski distance) + * + * @note if workspace is passed as nullptr, this will return in + * worksize, the number of bytes of workspace required + */ +template +void distance(const InType* x, + const InType* y, + OutType* dist, + Index_ m, + Index_ n, + Index_ k, + cudaStream_t stream, + bool isRowMajor = true, + InType metric_arg = 2.0f) +{ + rmm::device_uvector workspace(0, stream); + auto worksize = getWorkspaceSize(x, y, m, n, k); + workspace.resize(worksize, stream); + detail::distance( + x, y, dist, m, n, k, workspace.data(), worksize, stream, isRowMajor, metric_arg); +} + +/** + * @defgroup pairwise_distance pairwise distance prims + * @{ + * @brief Convenience wrapper around 'distance' prim to convert runtime metric + * into compile time for the purpose of dispatch + * @tparam Type input/accumulation/output data-type + * @tparam Index_ indexing type + * @param x first set of points + * @param y second set of points + * @param dist output distance matrix + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * @param workspace temporary workspace buffer which can get resized as per the + * needed workspace size + * @param metric distance metric + * @param stream cuda stream + * @param isRowMajor whether the matrices are row-major or col-major + */ +template +void pairwise_distance(const raft::handle_t& handle, + const Type* x, + const Type* y, + Type* dist, + Index_ m, + Index_ n, + Index_ k, + rmm::device_uvector& workspace, + raft::distance::DistanceType metric, + bool isRowMajor = true, + Type metric_arg = 2.0f) +{ + switch (metric) { + case raft::distance::DistanceType::L2Expanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::L2SqrtExpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::CosineExpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::L1: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::L2Unexpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::L2SqrtUnexpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::Linf: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::HellingerExpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::LpUnexpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor, metric_arg); + break; + case raft::distance::DistanceType::Canberra: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::HammingUnexpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::JensenShannon: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::RusselRaoExpanded: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::KLDivergence: + detail::pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + case raft::distance::DistanceType::CorrelationExpanded: + detail:: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, handle.get_stream(), isRowMajor); + break; + default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric); + }; +} +/** @} */ + +/** + * @defgroup pairwise_distance pairwise distance prims + * @{ + * @brief Convenience wrapper around 'distance' prim to convert runtime metric + * into compile time for the purpose of dispatch + * @tparam Type input/accumulation/output data-type + * @tparam Index_ indexing type + * @param x first set of points + * @param y second set of points + * @param dist output distance matrix + * @param m number of points in x + * @param n number of points in y + * @param k dimensionality + * @param metric distance metric + * @param stream cuda stream + * @param isRowMajor whether the matrices are row-major or col-major + */ +template +void pairwise_distance(const raft::handle_t& handle, + const Type* x, + const Type* y, + Type* dist, + Index_ m, + Index_ n, + Index_ k, + raft::distance::DistanceType metric, + bool isRowMajor = true, + Type metric_arg = 2.0f) +{ + rmm::device_uvector workspace(0, handle.get_stream()); + pairwise_distance( + handle, x, y, dist, m, n, k, workspace, metric, isRowMajor, metric_arg); +} + +}; // namespace distance +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/distance.hpp b/cpp/include/raft/distance/distance.hpp index 935cf6677a..f9fbde50e4 100644 --- a/cpp/include/raft/distance/distance.hpp +++ b/cpp/include/raft/distance/distance.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __DISTANCE_H +#define __DISTANCE_H #pragma once @@ -319,3 +326,5 @@ void pairwise_distance(const raft::handle_t& handle, }; // namespace distance }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/fused_l2_nn.cuh b/cpp/include/raft/distance/fused_l2_nn.cuh new file mode 100644 index 0000000000..ac8895c9ce --- /dev/null +++ b/cpp/include/raft/distance/fused_l2_nn.cuh @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FUSED_L2_NN_H +#define __FUSED_L2_NN_H + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace distance { + +template +using KVPMinReduce = detail::KVPMinReduceImpl; + +template +using MinAndDistanceReduceOp = detail::MinAndDistanceReduceOpImpl; + +template +using MinReduceOp = detail::MinReduceOpImpl; + +/** + * Initialize array using init value from reduction op + */ +template +void initialize(const raft::handle_t& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp) +{ + detail::initialize(min, m, maxVal, redOp, handle.get_stream()); +} + +/** + * @brief Fused L2 distance and 1-nearest-neighbor computation in a single call. + * + * The benefits of such a call are 2-fold: 1) eliminate the need for an + * intermediate buffer to store the output of gemm 2) reduce the memory read + * traffic on this intermediate buffer, otherwise needed during the reduction + * phase for 1-NN. + * + * @tparam DataT data type + * @tparam OutT output type to either store 1-NN indices and their minimum + * distances or store only the min distances. Accordingly, one + * has to pass an appropriate `ReduceOpT` + * @tparam IdxT indexing arithmetic type + * @tparam ReduceOpT A struct to perform the final needed reduction operation + * and also to initialize the output array elements with the + * appropriate initial value needed for reduction. + * + * @param[out] min will contain the reduced output (Length = `m`) + * (on device) + * @param[in] x first matrix. Row major. Dim = `m x k`. + * (on device). + * @param[in] y second matrix. Row major. Dim = `n x k`. + * (on device). + * @param[in] xn L2 squared norm of `x`. Length = `m`. (on device). + * @param[in] yn L2 squared norm of `y`. Length = `n`. (on device) + * @param[in] m gemm m + * @param[in] n gemm n + * @param[in] k gemm k + * @param[in] workspace temp workspace. Size = sizeof(int)*m. (on device) + * @param[in] redOp reduction operator in the epilogue + * @param[in] pairRedOp reduction operation on key value pairs + * @param[in] sqrt Whether the output `minDist` should contain L2-sqrt + * @param[in] initOutBuffer whether to initialize the output buffer before the + * main kernel launch + * @param[in] stream cuda stream + */ +template +void fusedL2NN(OutT* min, + const DataT* x, + const DataT* y, + const DataT* xn, + const DataT* yn, + IdxT m, + IdxT n, + IdxT k, + void* workspace, + ReduceOpT redOp, + KVPReduceOpT pairRedOp, + bool sqrt, + bool initOutBuffer, + cudaStream_t stream) +{ + size_t bytes = sizeof(DataT) * k; + if (16 % sizeof(DataT) == 0 && bytes % 16 == 0) { + detail::fusedL2NNImpl( + min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); + } else if (8 % sizeof(DataT) == 0 && bytes % 8 == 0) { + detail::fusedL2NNImpl( + min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); + } else { + detail::fusedL2NNImpl( + min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); + } +} + +} // namespace distance +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/fused_l2_nn.hpp b/cpp/include/raft/distance/fused_l2_nn.hpp index b293f0c237..1cb3ee39eb 100644 --- a/cpp/include/raft/distance/fused_l2_nn.hpp +++ b/cpp/include/raft/distance/fused_l2_nn.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __FUSED_L2_NN_H +#define __FUSED_L2_NN_H #pragma once @@ -111,3 +118,5 @@ void fusedL2NN(OutT* min, } // namespace distance } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/specializations.cuh b/cpp/include/raft/distance/specializations.cuh new file mode 100644 index 0000000000..5944534be7 --- /dev/null +++ b/cpp/include/raft/distance/specializations.cuh @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DISTANCE_SPECIALIZATIONS_H +#define __DISTANCE_SPECIALIZATIONS_H + +#pragma once + +#include + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/specializations.hpp b/cpp/include/raft/distance/specializations.hpp index e70943e731..db426c30d2 100644 --- a/cpp/include/raft/distance/specializations.hpp +++ b/cpp/include/raft/distance/specializations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __DISTANCE_SPECIALIZATIONS_H +#define __DISTANCE_SPECIALIZATIONS_H #pragma once -#include \ No newline at end of file +#include + +#endif \ No newline at end of file diff --git a/cpp/include/raft/distance/specializations/detail/canberra.hpp b/cpp/include/raft/distance/specializations/detail/canberra.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/canberra.hpp rename to cpp/include/raft/distance/specializations/detail/canberra.cuh index 2e71685532..22bdf41fd1 100644 --- a/cpp/include/raft/distance/specializations/detail/canberra.hpp +++ b/cpp/include/raft/distance/specializations/detail/canberra.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/chebyshev.hpp b/cpp/include/raft/distance/specializations/detail/chebyshev.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/chebyshev.hpp rename to cpp/include/raft/distance/specializations/detail/chebyshev.cuh index dc03e047be..7502409082 100644 --- a/cpp/include/raft/distance/specializations/detail/chebyshev.hpp +++ b/cpp/include/raft/distance/specializations/detail/chebyshev.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/correlation.hpp b/cpp/include/raft/distance/specializations/detail/correlation.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/correlation.hpp rename to cpp/include/raft/distance/specializations/detail/correlation.cuh index 2e7683ab10..a2cddea179 100644 --- a/cpp/include/raft/distance/specializations/detail/correlation.hpp +++ b/cpp/include/raft/distance/specializations/detail/correlation.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/cosine.hpp b/cpp/include/raft/distance/specializations/detail/cosine.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/cosine.hpp rename to cpp/include/raft/distance/specializations/detail/cosine.cuh index b47d294645..c98703e135 100644 --- a/cpp/include/raft/distance/specializations/detail/cosine.hpp +++ b/cpp/include/raft/distance/specializations/detail/cosine.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.hpp b/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/hamming_unexpanded.hpp rename to cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh index 29a4ca03d9..9cf7b9b343 100644 --- a/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/hellinger_expanded.hpp b/cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/hellinger_expanded.hpp rename to cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh index 264003ec0e..28ecaa1b65 100644 --- a/cpp/include/raft/distance/specializations/detail/hellinger_expanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/jensen_shannon.hpp b/cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh similarity index 98% rename from cpp/include/raft/distance/specializations/detail/jensen_shannon.hpp rename to cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh index 3135a4c579..ac0190562b 100644 --- a/cpp/include/raft/distance/specializations/detail/jensen_shannon.hpp +++ b/cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/kl_divergence.hpp b/cpp/include/raft/distance/specializations/detail/kl_divergence.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/kl_divergence.hpp rename to cpp/include/raft/distance/specializations/detail/kl_divergence.cuh index 207fca6bc2..b338cebdc2 100644 --- a/cpp/include/raft/distance/specializations/detail/kl_divergence.hpp +++ b/cpp/include/raft/distance/specializations/detail/kl_divergence.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/l1.hpp b/cpp/include/raft/distance/specializations/detail/l1.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/l1.hpp rename to cpp/include/raft/distance/specializations/detail/l1.cuh index e8eddfe1e4..65979ce414 100644 --- a/cpp/include/raft/distance/specializations/detail/l1.hpp +++ b/cpp/include/raft/distance/specializations/detail/l1.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/l2_expanded.hpp b/cpp/include/raft/distance/specializations/detail/l2_expanded.cuh similarity index 98% rename from cpp/include/raft/distance/specializations/detail/l2_expanded.hpp rename to cpp/include/raft/distance/specializations/detail/l2_expanded.cuh index db37b8db9f..1dac34ad7a 100644 --- a/cpp/include/raft/distance/specializations/detail/l2_expanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/l2_expanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.hpp b/cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.hpp rename to cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.cuh index ac23c9c357..8b752d8235 100644 --- a/cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/l2_sqrt_expanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.hpp b/cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.hpp rename to cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.cuh index 1e38575fbf..8632fda769 100644 --- a/cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/l2_sqrt_unexpanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/l2_unexpanded.hpp b/cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/l2_unexpanded.hpp rename to cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh index 035c9ef693..3962cfd1ae 100644 --- a/cpp/include/raft/distance/specializations/detail/l2_unexpanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/detail/lp_unexpanded.hpp b/cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh similarity index 97% rename from cpp/include/raft/distance/specializations/detail/lp_unexpanded.hpp rename to cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh index 83eda5f07b..1f7e504ba8 100644 --- a/cpp/include/raft/distance/specializations/detail/lp_unexpanded.hpp +++ b/cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/distance/specializations/distance.hpp b/cpp/include/raft/distance/specializations/distance.cuh similarity index 54% rename from cpp/include/raft/distance/specializations/distance.hpp rename to cpp/include/raft/distance/specializations/distance.cuh index a57d6f49a5..7553f87e39 100644 --- a/cpp/include/raft/distance/specializations/distance.hpp +++ b/cpp/include/raft/distance/specializations/distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,17 +16,17 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/cpp/include/raft/label/classlabels.cuh b/cpp/include/raft/label/classlabels.cuh new file mode 100644 index 0000000000..93c1080ff2 --- /dev/null +++ b/cpp/include/raft/label/classlabels.cuh @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CLASS_LABELS_H +#define __CLASS_LABELS_H + +#pragma once + +#include + +namespace raft { +namespace label { + +/** + * Get unique class labels. + * + * The y array is assumed to store class labels. The unique values are selected + * from this array. + * + * @tparam value_t numeric type of the arrays with class labels + * @param [inout] unique output unique labels + * @param [in] y device array of labels, size [n] + * @param [in] n number of labels + * @param [in] stream cuda stream + * @returns unique device array of unique labels, unallocated on entry, + * on exit it has size + */ +template +int getUniquelabels(rmm::device_uvector& unique, value_t* y, size_t n, cudaStream_t stream) +{ + return detail::getUniquelabels(unique, y, n, stream); +} + +/** + * Assign one versus rest labels. + * + * The output labels will have values +/-1: + * y_out = (y == y_unique[idx]) ? +1 : -1; + * + * The output type currently is set to value_t, but for SVM in principle we are + * free to choose other type for y_out (it should represent +/-1, and it is used + * in floating point arithmetics). + * + * @param [in] y device array if input labels, size [n] + * @param [in] n number of labels + * @param [in] y_unique device array of unique labels, size [n_classes] + * @param [in] n_classes number of unique labels + * @param [out] y_out device array of output labels + * @param [in] idx index of unique label that should be labeled as 1 + * @param [in] stream cuda stream + */ +template +void getOvrlabels( + value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream) +{ + detail::getOvrlabels(y, n, y_unique, n_classes, y_out, idx, stream); +} +/** + * Maps an input array containing a series of numbers into a new array + * where numbers have been mapped to a monotonically increasing set + * of labels. This can be useful in machine learning algorithms, for instance, + * where a given set of labels is not taken from a monotonically increasing + * set. This can happen if they are filtered or if only a subset of the + * total labels are used in a dataset. This is also useful in graph algorithms + * where a set of vertices need to be labeled in a monotonically increasing + * order. + * @tparam Type the numeric type of the input and output arrays + * @tparam Lambda the type of an optional filter function, which determines + * which items in the array to map. + * @param[out] out the output monotonic array + * @param[in] in input label array + * @param[in] N number of elements in the input array + * @param[in] stream cuda stream to use + * @param[in] filter_op an optional function for specifying which values + * should have monotonically increasing labels applied to them. + * @param[in] zero_based force monotonic set to start at 0? + */ +template +void make_monotonic( + Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false) +{ + detail::make_monotonic(out, in, N, stream, filter_op, zero_based); +} + +/** + * Maps an input array containing a series of numbers into a new array + * where numbers have been mapped to a monotonically increasing set + * of labels. This can be useful in machine learning algorithms, for instance, + * where a given set of labels is not taken from a monotonically increasing + * set. This can happen if they are filtered or if only a subset of the + * total labels are used in a dataset. This is also useful in graph algorithms + * where a set of vertices need to be labeled in a monotonically increasing + * order. + * @tparam Type the numeric type of the input and output arrays + * @param[out] out output label array with labels assigned monotonically + * @param[in] in input label array + * @param[in] N number of elements in the input array + * @param[in] stream cuda stream to use + * @param[in] zero_based force monotonic label set to start at 0? + */ +template +void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false) +{ + detail::make_monotonic(out, in, N, stream, zero_based); +} +}; // namespace label +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/label/classlabels.hpp b/cpp/include/raft/label/classlabels.hpp index de9f60518d..189c26f69f 100644 --- a/cpp/include/raft/label/classlabels.hpp +++ b/cpp/include/raft/label/classlabels.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,9 @@ * limitations under the License. */ +#ifndef __CLASS_LABELS_H +#define __CLASS_LABELS_H + #pragma once #include @@ -115,3 +118,5 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer } }; // namespace label }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/label/detail/classlabels.cuh b/cpp/include/raft/label/detail/classlabels.cuh index 53657a5dfa..a941751d78 100644 --- a/cpp/include/raft/label/detail/classlabels.cuh +++ b/cpp/include/raft/label/detail/classlabels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/label/detail/merge_labels.cuh b/cpp/include/raft/label/detail/merge_labels.cuh index bf03d1c738..1f62b3f0d6 100644 --- a/cpp/include/raft/label/detail/merge_labels.cuh +++ b/cpp/include/raft/label/detail/merge_labels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #include #include -#include +#include namespace raft { namespace label { diff --git a/cpp/include/raft/label/merge_labels.cuh b/cpp/include/raft/label/merge_labels.cuh new file mode 100644 index 0000000000..2bf2fa830b --- /dev/null +++ b/cpp/include/raft/label/merge_labels.cuh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MERGE_LABELS_H +#define __MERGE_LABELS_H + +#pragma once + +#include + +namespace raft { +namespace label { + +/** + * @brief Merge two labellings in-place, according to a core mask + * + * A labelling is a representation of disjoint sets (groups) where points that + * belong to the same group have the same label. It is assumed that group + * labels take values between 1 and N. labels relate to points, i.e a label i+1 + * means that you belong to the same group as the point i. + * The special value MAX_LABEL is used to mark points that are not labelled. + * + * The two label arrays A and B induce two sets of groups over points 0..N-1. + * If a point is labelled i in A and j in B and the mask is true for this + * point, then i and j are equivalent labels and their groups are merged by + * relabeling the elements of both groups to have the same label. The new label + * is the smaller one from the original labels. + * It is required that if the mask is true for a point, this point is labelled + * (i.e its label is different than the special value MAX_LABEL). + * + * One use case is finding connected components: the two input label arrays can + * represent the connected components of graphs G_A and G_B, and the output + * would be the connected components labels of G_A \union G_B. + * + * @param[inout] labels_a First input, and output label array (in-place) + * @param[in] labels_b Second input label array + * @param[in] mask Core point mask + * @param[out] R label equivalence map + * @param[in] m Working flag + * @param[in] N Number of points in the dataset + * @param[in] stream CUDA stream + */ +template +void merge_labels(value_idx* labels_a, + const value_idx* labels_b, + const bool* mask, + value_idx* R, + bool* m, + value_idx N, + cudaStream_t stream) +{ + detail::merge_labels(labels_a, labels_b, mask, R, m, N, stream); +} + +}; // namespace label +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/label/merge_labels.hpp b/cpp/include/raft/label/merge_labels.hpp index 5ba8fe8a27..2bf2fa830b 100644 --- a/cpp/include/raft/label/merge_labels.hpp +++ b/cpp/include/raft/label/merge_labels.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,9 @@ * limitations under the License. */ +#ifndef __MERGE_LABELS_H +#define __MERGE_LABELS_H + #pragma once #include @@ -63,4 +66,6 @@ void merge_labels(value_idx* labels_a, } }; // namespace label -}; // namespace raft \ No newline at end of file +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/lap/detail/d_structs.h b/cpp/include/raft/lap/detail/d_structs.h index e488dc528f..74679d64ce 100644 --- a/cpp/include/raft/lap/detail/d_structs.h +++ b/cpp/include/raft/lap/detail/d_structs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * Copyright 2020 KETAN DATE & RAKESH NAGI * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cpp/include/raft/lap/detail/lap_functions.cuh b/cpp/include/raft/lap/detail/lap_functions.cuh index 6c6b09e5d8..3a801ff060 100644 --- a/cpp/include/raft/lap/detail/lap_functions.cuh +++ b/cpp/include/raft/lap/detail/lap_functions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * Copyright 2020 KETAN DATE & RAKESH NAGI * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cpp/include/raft/lap/detail/lap_kernels.cuh b/cpp/include/raft/lap/detail/lap_kernels.cuh index b61d0bd269..e98b246733 100644 --- a/cpp/include/raft/lap/detail/lap_kernels.cuh +++ b/cpp/include/raft/lap/detail/lap_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * Copyright 2020 KETAN DATE & RAKESH NAGI * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.cuh similarity index 99% rename from cpp/include/raft/lap/lap.hpp rename to cpp/include/raft/lap/lap.cuh index 2350ebcddf..5f72ca27c8 100644 --- a/cpp/include/raft/lap/lap.hpp +++ b/cpp/include/raft/lap/lap.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * Copyright 2020 KETAN DATE & RAKESH NAGI * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh new file mode 100644 index 0000000000..92152a8c03 --- /dev/null +++ b/cpp/include/raft/linalg/add.cuh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ADD_H +#define __ADD_H + +#pragma once + +#include "detail/add.cuh" + +namespace raft { +namespace linalg { + +using detail::adds_scalar; + +/** + * @brief Elementwise scalar add operation on the input buffer + * + * @tparam InT input data-type. Also the data-type upon which the math ops + * will be performed + * @tparam OutT output data-type + * @tparam IdxType Integer type used to for addressing + * + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + */ +template +void addScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream) +{ + detail::addScalar(out, in, scalar, len, stream); +} + +/** + * @brief Elementwise add operation on the input buffers + * @tparam InT input data-type. Also the data-type upon which the math ops + * will be performed + * @tparam OutT output data-type + * @tparam IdxType Integer type used to for addressing + * + * @param out the output buffer + * @param in1 the first input buffer + * @param in2 the second input buffer + * @param len number of elements in the input buffers + * @param stream cuda stream where to launch work + */ +template +void add(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream) +{ + detail::add(out, in1, in2, len, stream); +} + +/** Substract single value pointed by singleScalarDev parameter in device memory from inDev[i] and + * write result to outDev[i] + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param outDev the output buffer + * @param inDev the input buffer + * @param singleScalarDev pointer to the scalar located in device memory + * @param len number of elements in the input and output buffer + * @param stream cuda stream + */ +template +void addDevScalar(math_t* outDev, + const math_t* inDev, + const math_t* singleScalarDev, + IdxType len, + cudaStream_t stream) +{ + detail::addDevScalar(outDev, inDev, singleScalarDev, len, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/add.hpp b/cpp/include/raft/linalg/add.hpp index 2f999a45d2..32c7f68459 100644 --- a/cpp/include/raft/linalg/add.hpp +++ b/cpp/include/raft/linalg/add.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __ADD_H +#define __ADD_H #pragma once @@ -84,3 +91,5 @@ void addDevScalar(math_t* outDev, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/axpy.cuh b/cpp/include/raft/linalg/axpy.cuh new file mode 100644 index 0000000000..2e23047b5a --- /dev/null +++ b/cpp/include/raft/linalg/axpy.cuh @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __AXPY_H +#define __AXPY_H + +#pragma once + +#include "detail/axpy.cuh" + +namespace raft::linalg { + +/** + * @brief the wrapper of cublas axpy function + * It computes the following equation: y = alpha * x + y + * + * @tparam T the element type + * @tparam DevicePointerMode whether pointers alpha, beta point to device memory + * @param [in] handle raft handle + * @param [in] n number of elements in x and y + * @param [in] alpha host or device scalar + * @param [in] x vector of length n + * @param [in] incx stride between consecutive elements of x + * @param [inout] y vector of length n + * @param [in] incy stride between consecutive elements of y + * @param [in] stream + */ +template +void axpy(const raft::handle_t& handle, + const int n, + const T* alpha, + const T* x, + const int incx, + T* y, + const int incy, + cudaStream_t stream) +{ + detail::axpy(handle, n, alpha, x, incx, y, incy, stream); +} + +} // namespace raft::linalg + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/axpy.hpp b/cpp/include/raft/linalg/axpy.hpp index 5a5a873132..921ed3f89b 100644 --- a/cpp/include/raft/linalg/axpy.hpp +++ b/cpp/include/raft/linalg/axpy.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __AXPY_H +#define __AXPY_H #pragma once -#include "detail/axpy.hpp" +#include "detail/axpy.cuh" namespace raft::linalg { @@ -49,3 +56,5 @@ void axpy(const raft::handle_t& handle, } } // namespace raft::linalg + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh new file mode 100644 index 0000000000..a85bf698f7 --- /dev/null +++ b/cpp/include/raft/linalg/binary_op.cuh @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __BINARY_OP_H +#define __BINARY_OP_H + +#pragma once + +#include "detail/binary_op.cuh" + +#include + +namespace raft { +namespace linalg { + +/** + * @brief perform element-wise binary operation on the input arrays + * @tparam InType input data-type + * @tparam Lambda the device-lambda performing the actual operation + * @tparam OutType output data-type + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads-per-block in the final kernel launched + * @param out the output array + * @param in1 the first input array + * @param in2 the second input array + * @param len number of elements in the input array + * @param op the device-lambda + * @param stream cuda stream where to launch work + * @note Lambda must be a functor with the following signature: + * `OutType func(const InType& val1, const InType& val2);` + */ +template +void binaryOp( + OutType* out, const InType* in1, const InType* in2, IdxType len, Lambda op, cudaStream_t stream) +{ + detail::binaryOp(out, in1, in2, len, op, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.hpp b/cpp/include/raft/linalg/binary_op.hpp index 5c73b6d3c5..468c278909 100644 --- a/cpp/include/raft/linalg/binary_op.hpp +++ b/cpp/include/raft/linalg/binary_op.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __BINARY_OP_H +#define __BINARY_OP_H #pragma once @@ -52,3 +59,5 @@ void binaryOp( }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/cholesky_r1_update.cuh b/cpp/include/raft/linalg/cholesky_r1_update.cuh new file mode 100644 index 0000000000..7d22d6bcf7 --- /dev/null +++ b/cpp/include/raft/linalg/cholesky_r1_update.cuh @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CHOLESKY_R1_UPDATE_H +#define __CHOLESKY_R1_UPDATE_H + +#pragma once + +#include "detail/cholesky_r1_update.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Rank 1 update of Cholesky decomposition. + * + * This method is useful if an algorithm iteratively builds up matrix A, and + * the Cholesky decomposition of A is required at each step. + * + * On entry, L is the Cholesky decomposition of matrix A, where both A and L + * have size n-1 x n-1. We are interested in the Cholesky decomposition of a new + * matrix A', which we get by adding a row and column to A. In Python notation: + * - A'[0:n-1, 0:n-1] = A; + * - A'[:,n-1] = A[n-1,:] = A_new + * + * On entry, the new column A_new, is stored as the n-th column of L if uplo == + * CUBLAS_FILL_MODE_UPPER, else A_new is stored as the n-th row of L. + * + * On exit L contains the Cholesky decomposition of A'. In practice the elements + * of A_new are overwritten with new row/column of the L matrix. + * + * The uplo paramater is used to select the matrix layout. + * If (uplo != CUBLAS_FILL_MODE_UPPER) then the input arg L stores the + * lower triangular matrix L, so that A = L * L.T. Otherwise the input arg L + * stores an upper triangular matrix U: A = U.T * U. + * + * On exit L will be updated to store the Cholesky decomposition of A'. + * + * If the matrix is not positive definit, or very ill conditioned then the new + * diagonal element of L would be NaN. In such a case an exception is thrown. + * The eps argument can be used to override this behavior: if eps >= 0 then + * the diagonal element is replaced by eps in case the diagonal is NaN or + * smaller than eps. Note: for an iterative solver it is probably better to + * stop early in case of error, rather than relying on the eps parameter. + * + * Examples: + * + * - Lower triangular factorization: + * @code{.cpp} + * // Initialize arrays + * int ld_L = n_rows; + * rmm::device_uvector L(ld_L * n_rows, stream); + * MLCommon::LinAlg::choleskyRank1Update(handle, L, n_rows, ld_L, nullptr, + * &n_bytes, CUBLAS_FILL_MODE_LOWER, + * stream); + * rmm::device_uvector workspace(n_bytes, stream); + * + * for (n=1; n<=n_rows; rank++) { + * // Calculate a new row/column of matrix A into A_new + * // ... + * // Copy new row to L[rank-1,:] + * RAFT_CUBLAS_TRY(cublasCopy(handle.get_cublas_handle(), n - 1, A_new, 1, + * L + n - 1, ld_L, stream)); + * // Update Cholesky factorization + * MLCommon::LinAlg::choleskyRank1Update( + * handle, L, rank, ld_L, workspace, &n_bytes, CUBLAS_FILL_MODE_LOWER, + * stream); + * } + * Now L stores the Cholesky decomposition of A: A = L * L.T + * @endcode + * + * - Upper triangular factorization: + * @code{.cpp} + * // Initialize arrays + * int ld_U = n_rows; + * rmm::device_uvector U(ld_U * n_rows, stream); + * MLCommon::LinAlg::choleskyRank1Update(handle, L, n_rows, ld_U, nullptr, + * &n_bytes, CUBLAS_FILL_MODE_UPPER, + * stream); + * rmm::device_uvector workspace(stream, n_bytes, stream); + * + * for (n=1; n<=n_rows; n++) { + * // Calculate a new row/column of matrix A into array A_new + * // ... + * // Copy new row to U[:,n-1] (column major layout) + * raft::copy(U + ld_U * (n-1), A_new, n-1, stream); + * // + * // Update Cholesky factorization + * MLCommon::LinAlg::choleskyRank1Update( + * handle, U, n, ld_U, workspace, &n_bytes, CUBLAS_FILL_MODE_UPPER, + * stream); + * } + * // Now U stores the Cholesky decomposition of A: A = U.T * U + * @endcode + * + * @param handle RAFT handle (used to retrive cuBLAS handles). + * @param L device array for to store the triangular matrix L, and the new + * column of A in column major format, size [n*n] + * @param n number of elements in the new row. + * @param ld stride of colums in L + * @param workspace device pointer to workspace shall be nullptr ar an array + * of size [n_bytes]. + * @param n_bytes size of workspace is returned here if workspace==nullptr. + * @param stream CUDA stream + * @param uplo indicates whether L is stored as an upper or lower triangular + * matrix (CUBLAS_FILL_MODE_UPPER or CUBLAS_FILL_MODE_LOWER) + * @param eps numerical parameter that can act as a regularizer for ill + * conditioned systems. Negative values mean no regularizaton. + */ +template +void choleskyRank1Update(const raft::handle_t& handle, + math_t* L, + int n, + int ld, + void* workspace, + int* n_bytes, + cublasFillMode_t uplo, + cudaStream_t stream, + math_t eps = -1) +{ + detail::choleskyRank1Update(handle, L, n, ld, workspace, n_bytes, uplo, stream, eps); +} +}; // namespace linalg +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/cholesky_r1_update.hpp b/cpp/include/raft/linalg/cholesky_r1_update.hpp index 583c65c50e..b55f5d06da 100644 --- a/cpp/include/raft/linalg/cholesky_r1_update.hpp +++ b/cpp/include/raft/linalg/cholesky_r1_update.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __CHOLESKY_R1_UPDATE_H +#define __CHOLESKY_R1_UPDATE_H #pragma once -#include "detail/cholesky_r1_update.hpp" +#include "detail/cholesky_r1_update.cuh" namespace raft { namespace linalg { @@ -132,3 +139,5 @@ void choleskyRank1Update(const raft::handle_t& handle, } }; // namespace linalg }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh new file mode 100644 index 0000000000..03477f72d6 --- /dev/null +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __COALESCED_REDUCTION_H +#define __COALESCED_REDUCTION_H + +#pragma once + +#include "detail/coalesced_reduction.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Compute reduction of the input matrix along the leading dimension + * + * @tparam InType the data type of the input + * @tparam OutType the data type of the output (as well as the data type for + * which reduction is performed) + * @tparam IdxType data type of the indices of the array + * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*MainLambda)(InType, IdxType);
+ * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*ReduceLambda)(OutType);
+ * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*FinalLambda)(OutType);
+ * @param dots the output reduction vector + * @param data the input matrix + * @param D leading dimension of data + * @param N second dimension data + * @param init initial value to use for the reduction + * @param main_op elementwise operation to apply before reduction + * @param reduce_op binary reduction operation + * @param final_op elementwise operation to apply before storing results + * @param inplace reduction result added inplace or overwrites old values? + * @param stream cuda stream where to launch work + */ +template , + typename ReduceLambda = raft::Sum, + typename FinalLambda = raft::Nop> +void coalescedReduction(OutType* dots, + const InType* data, + int D, + int N, + OutType init, + cudaStream_t stream, + bool inplace = false, + MainLambda main_op = raft::Nop(), + ReduceLambda reduce_op = raft::Sum(), + FinalLambda final_op = raft::Nop()) +{ + detail::coalescedReduction(dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/coalesced_reduction.hpp b/cpp/include/raft/linalg/coalesced_reduction.hpp index 0f1ca9202d..4b9e5d262f 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.hpp +++ b/cpp/include/raft/linalg/coalesced_reduction.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __COALESCED_REDUCTION_H +#define __COALESCED_REDUCTION_H #pragma once @@ -70,3 +77,5 @@ void coalescedReduction(OutType* dots, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh new file mode 100644 index 0000000000..5ccbd15c3d --- /dev/null +++ b/cpp/include/raft/linalg/contractions.cuh @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CONTRACTIONS_H +#define __CONTRACTIONS_H + +#pragma once + +#include "detail/contractions.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief This is the central enum that should be used to configure the perf + * landscape of the Contraction kernel. + * + * Main goal of this Policy struct is to provide sufficient knobs to tune the + * perf of Contraction kernel, as and when we see matrices of different shapes. + * + * @tparam DataT the IO and math datatype + * @tparam _veclen number of k-elements loaded by each thread for every LDG call + * it makes. This should be configured based on the input 'k' + * value and the input data type. For eg: if DataT = float and + * k is multiples of 4, then setting this to 4 gives the best + * LDG pattern. Possible values are {1, 2, 4}. + * @tparam _kblk number of k-elements operated upon per main-loop iteration. + * Therefore total number of main-loop iterations will be + * `ceil(k/_kblk)`. This must be multiples of `_veclen`. Do note + * that bigger this value, the greater shared mem requirement. + * @tparam _rpt Defines the number of rows that a given thread accumulates on. + * This directly results in increased register pressure. This + * also is used to compute the number of m-elements worked upon + * by each thread block. + * @tparam _cpt Defines the number of cols that a given thread accumulates on. + * This directly results in increased register pressure. This + * also is used to compute the number of n-elements worked upon + * by each thread block. + * @tparam _tr Number of threads working on the same output column. This is + * used to compute the number of m-elements worked upon by each + * thread block. This also determines the number of threads per + * thread block + * @tparam _tc Number of threads working on the same output row. This is + * used to compute the number of m-elements worked upon by each + * thread block. This also determines the number of threads per + * thread block + */ +template +struct KernelPolicy { + enum { + /** number of elements along K worked upon per main loop iteration */ + Kblk = _kblk, + /** number of elements loaded per LDG */ + Veclen = _veclen, + /** number of rows a thread works on for accumulation */ + AccRowsPerTh = _rpt, + /** number of cols a thread works on for accumulation */ + AccColsPerTh = _cpt, + /** number of threads working the same output col */ + AccThRows = _tr, + /** number of threads working the same output row */ + AccThCols = _tc, + /** total threads per block */ + Nthreads = AccThRows * AccThCols, + /** output tile size along rows */ + Mblk = AccRowsPerTh * AccThRows, + /** output tile size along cols */ + Nblk = AccColsPerTh * AccThCols, + /** number of threads loading a single row */ + LdgThRow = Kblk / Veclen, + /** number of LDGs issued by a single thread for X */ + LdgPerThX = Mblk * LdgThRow / Nthreads, + /** number of LDGs issued by a single thread for Y */ + LdgPerThY = Nblk * LdgThRow / Nthreads, + /** number of rows of X covered per LDG */ + LdgRowsX = Mblk / LdgPerThX, + /** number of rows of Y covered per LDG */ + LdgRowsY = Nblk / LdgPerThY, + /** stride for accessing X/Y data in shared mem */ + SmemStride = Kblk + Veclen, + /** size of one page for storing X data */ + SmemPageX = SmemStride * Mblk, + /** size of one page for storing Y data */ + SmemPageY = SmemStride * Nblk, + /** size of one smem page */ + SmemPage = SmemPageX + SmemPageY, + /** size (in B) for smem needed */ + SmemSize = 2 * SmemPage * sizeof(DataT), + }; // enum + +}; // struct KernelPolicy + +template +struct ColKernelPolicy { + enum { + /** number of elements along K worked upon per main loop iteration */ + Kblk = _kblk, + /** number of elements loaded per LDG */ + Veclen = _veclen, + /** number of rows a thread works on for accumulation */ + AccRowsPerTh = _rpt, + /** number of cols a thread works on for accumulation */ + AccColsPerTh = _cpt, + /** number of threads working the same output col */ + AccThRows = _tr, + /** number of threads working the same output row */ + AccThCols = _tc, + /** total threads per block */ + Nthreads = AccThRows * AccThCols, + /** output tile size along rows */ + Mblk = AccRowsPerTh * AccThRows, + /** output tile size along cols */ + Nblk = AccColsPerTh * AccThCols, + /** number of threads loading a single col */ + LdgThRow = Mblk / Veclen, + /** number of LDGs issued by a single thread for X */ + LdgPerThX = Kblk * LdgThRow / Nthreads, + /** number of LDGs issued by a single thread for Y */ + LdgPerThY = Kblk * LdgThRow / Nthreads, + /** number of rows of X covered per LDG */ + LdgRowsX = Kblk / LdgPerThX, + /** number of rows of Y covered per LDG */ + LdgRowsY = Kblk / LdgPerThY, + /** stride for accessing X/Y data in shared mem */ + SmemStride = Mblk + Veclen, + /** size of one page for storing X data */ + SmemPageX = SmemStride * Kblk, + /** size of one page for storing Y data */ + SmemPageY = SmemStride * Kblk, + /** size of one smem page */ + SmemPage = SmemPageX + SmemPageY, + /** size (in B) for smem needed */ + SmemSize = 2 * SmemPage * sizeof(DataT), + }; // colMajor enum + static_assert(Mblk == Nblk, "Mblk should be equal to Nblk"); +}; +/** + * @defgroup Policy4x4 16 elements per thread Policy with k-block = 32 + * @{ + */ +template +struct Policy4x4 { +}; + +template +struct Policy4x4 { + typedef KernelPolicy Policy; + typedef ColKernelPolicy ColPolicy; +}; + +template +struct Policy4x4 { + typedef KernelPolicy Policy; + typedef ColKernelPolicy ColPolicy; +}; +/** @} */ + +/** + * @defgroup Policy2x8 16 elements per thread Policy with k-block = 16 + * @{ + */ +template +struct Policy2x8 { +}; + +template +struct Policy2x8 { + typedef KernelPolicy Policy; + typedef ColKernelPolicy ColPolicy; +}; + +template +struct Policy2x8 { + // this is not used just for keeping compiler happy. + typedef KernelPolicy Policy; + typedef ColKernelPolicy ColPolicy; +}; +/** @} */ + +/** + * @brief Base class for gemm-like NT contractions + * + * This class does not provide any arithmetic operations, but only provides the + * memory-related operations of loading the `x` and `y` matrix blocks from the + * global memory into shared memory and then from shared into registers. Thus, + * this class acts as a basic building block for further composing gemm-like NT + * contractions on input matrices which are row-major (and so does the output) + * + * @tparam DataT IO and math data type + * @tparam IdxT indexing type + * @tparam Policy policy used to customize memory access behavior. + * See documentation for `KernelPolicy` to know more. + */ +using detail::Contractions_NT; + +} // namespace linalg +} // namespace raft + +#endif diff --git a/cpp/include/raft/linalg/contractions.hpp b/cpp/include/raft/linalg/contractions.hpp index e317588b1d..84c86b93a4 100644 --- a/cpp/include/raft/linalg/contractions.hpp +++ b/cpp/include/raft/linalg/contractions.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __CONTRACTIONS_H +#define __CONTRACTIONS_H #pragma once @@ -205,3 +212,5 @@ using detail::Contractions_NT; } // namespace linalg } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/cublas_macros.h b/cpp/include/raft/linalg/cublas_macros.h new file mode 100644 index 0000000000..1cb5cfc81a --- /dev/null +++ b/cpp/include/raft/linalg/cublas_macros.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +///@todo: enable this once we have logger enabled +//#include + +#include + +#define _CUBLAS_ERR_TO_STR(err) \ + case err: return #err + +namespace raft { + +/** + * @brief Exception thrown when a cuBLAS error is encountered. + */ +struct cublas_error : public raft::exception { + explicit cublas_error(char const* const message) : raft::exception(message) {} + explicit cublas_error(std::string const& message) : raft::exception(message) {} +}; + +namespace linalg { +namespace detail { + +inline const char* cublas_error_to_string(cublasStatus_t err) +{ + switch (err) { + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_SUCCESS); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_NOT_INITIALIZED); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_ALLOC_FAILED); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_INVALID_VALUE); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_ARCH_MISMATCH); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_MAPPING_ERROR); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_EXECUTION_FAILED); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_INTERNAL_ERROR); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_NOT_SUPPORTED); + _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_LICENSE_ERROR); + default: return "CUBLAS_STATUS_UNKNOWN"; + }; +} + +} // namespace detail +} // namespace linalg +} // namespace raft + +#undef _CUBLAS_ERR_TO_STR + +/** + * @brief Error checking macro for cuBLAS runtime API functions. + * + * Invokes a cuBLAS runtime API function call, if the call does not return + * CUBLAS_STATUS_SUCCESS, throws an exception detailing the cuBLAS error that occurred + */ +#define RAFT_CUBLAS_TRY(call) \ + do { \ + cublasStatus_t const status = (call); \ + if (CUBLAS_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "cuBLAS error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + raft::linalg::detail::cublas_error_to_string(status)); \ + throw raft::cublas_error(msg); \ + } \ + } while (0) + +// FIXME: Remove after consumers rename +#ifndef CUBLAS_TRY +#define CUBLAS_TRY(call) RAFT_CUBLAS_TRY(call) +#endif + +// /** +// * @brief check for cuda runtime API errors but log error instead of raising +// * exception. +// */ +#define RAFT_CUBLAS_TRY_NO_THROW(call) \ + do { \ + cublasStatus_t const status = call; \ + if (CUBLAS_STATUS_SUCCESS != status) { \ + printf("CUBLAS call='%s' at file=%s line=%d failed with %s\n", \ + #call, \ + __FILE__, \ + __LINE__, \ + raft::linalg::detail::cublas_error_to_string(status)); \ + } \ + } while (0) + +/** FIXME: remove after cuml rename */ +#ifndef CUBLAS_CHECK +#define CUBLAS_CHECK(call) CUBLAS_TRY(call) +#endif + +/** FIXME: remove after cuml rename */ +#ifndef CUBLAS_CHECK_NO_THROW +#define CUBLAS_CHECK_NO_THROW(call) RAFT_CUBLAS_TRY_NO_THROW(call) +#endif diff --git a/cpp/include/raft/linalg/cusolver_macros.h b/cpp/include/raft/linalg/cusolver_macros.h new file mode 100644 index 0000000000..6db0577509 --- /dev/null +++ b/cpp/include/raft/linalg/cusolver_macros.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +///@todo: enable this once logging is enabled +//#include +#include +#include + +#define _CUSOLVER_ERR_TO_STR(err) \ + case err: return #err; + +namespace raft { + +/** + * @brief Exception thrown when a cuSOLVER error is encountered. + */ +struct cusolver_error : public raft::exception { + explicit cusolver_error(char const* const message) : raft::exception(message) {} + explicit cusolver_error(std::string const& message) : raft::exception(message) {} +}; + +namespace linalg { + +inline const char* cusolver_error_to_string(cusolverStatus_t err) +{ + switch (err) { + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_SUCCESS); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_NOT_INITIALIZED); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_ALLOC_FAILED); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_INVALID_VALUE); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_ARCH_MISMATCH); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_EXECUTION_FAILED); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_INTERNAL_ERROR); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_ZERO_PIVOT); + _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_NOT_SUPPORTED); + default: return "CUSOLVER_STATUS_UNKNOWN"; + }; +} + +} // namespace linalg +} // namespace raft + +#undef _CUSOLVER_ERR_TO_STR + +/** + * @brief Error checking macro for cuSOLVER runtime API functions. + * + * Invokes a cuSOLVER runtime API function call, if the call does not return + * CUSolver_STATUS_SUCCESS, throws an exception detailing the cuSOLVER error that occurred + */ +#define RAFT_CUSOLVER_TRY(call) \ + do { \ + cusolverStatus_t const status = (call); \ + if (CUSOLVER_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "cuSOLVER error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + raft::linalg::detail::cusolver_error_to_string(status)); \ + throw raft::cusolver_error(msg); \ + } \ + } while (0) + +// FIXME: remove after consumer rename +#ifndef CUSOLVER_TRY +#define CUSOLVER_TRY(call) RAFT_CUSOLVER_TRY(call) +#endif + +// /** +// * @brief check for cuda runtime API errors but log error instead of raising +// * exception. +// */ +#define RAFT_CUSOLVER_TRY_NO_THROW(call) \ + do { \ + cusolverStatus_t const status = call; \ + if (CUSOLVER_STATUS_SUCCESS != status) { \ + printf("CUSOLVER call='%s' at file=%s line=%d failed with %s\n", \ + #call, \ + __FILE__, \ + __LINE__, \ + raft::linalg::detail::cusolver_error_to_string(status)); \ + } \ + } while (0) + +// FIXME: remove after cuml rename +#ifndef CUSOLVER_CHECK +#define CUSOLVER_CHECK(call) CUSOLVER_TRY(call) +#endif + +#ifndef CUSOLVER_CHECK_NO_THROW +#define CUSOLVER_CHECK_NO_THROW(call) CUSOLVER_TRY_NO_THROW(call) +#endif diff --git a/cpp/include/raft/linalg/detail/add.cuh b/cpp/include/raft/linalg/detail/add.cuh index 794a776dcf..652ffd2e86 100644 --- a/cpp/include/raft/linalg/detail/add.cuh +++ b/cpp/include/raft/linalg/detail/add.cuh @@ -19,8 +19,8 @@ #include "functional.cuh" #include -#include -#include +#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/axpy.hpp b/cpp/include/raft/linalg/detail/axpy.cuh similarity index 100% rename from cpp/include/raft/linalg/detail/axpy.hpp rename to cpp/include/raft/linalg/detail/axpy.cuh diff --git a/cpp/include/raft/linalg/detail/cholesky_r1_update.hpp b/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh similarity index 98% rename from cpp/include/raft/linalg/detail/cholesky_r1_update.hpp rename to cpp/include/raft/linalg/detail/cholesky_r1_update.cuh index 48993886a6..df1fb0a1f3 100644 --- a/cpp/include/raft/linalg/detail/cholesky_r1_update.hpp +++ b/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh @@ -18,9 +18,8 @@ #include "cublas_wrappers.hpp" #include "cusolver_wrappers.hpp" -#include #include -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 752235d246..7f9abc324e 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include diff --git a/cpp/include/raft/linalg/detail/divide.hpp b/cpp/include/raft/linalg/detail/divide.cuh similarity index 96% rename from cpp/include/raft/linalg/detail/divide.hpp rename to cpp/include/raft/linalg/detail/divide.cuh index c694529fb5..cb46ae76de 100644 --- a/cpp/include/raft/linalg/detail/divide.hpp +++ b/cpp/include/raft/linalg/detail/divide.cuh @@ -17,7 +17,7 @@ #pragma once #include "functional.cuh" -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/eig.hpp b/cpp/include/raft/linalg/detail/eig.cuh similarity index 99% rename from cpp/include/raft/linalg/detail/eig.hpp rename to cpp/include/raft/linalg/detail/eig.cuh index 8716b4de29..1d9a6bfa8f 100644 --- a/cpp/include/raft/linalg/detail/eig.hpp +++ b/cpp/include/raft/linalg/detail/eig.cuh @@ -18,10 +18,9 @@ #include "cusolver_wrappers.hpp" #include -#include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/linalg/detail/eltwise.hpp b/cpp/include/raft/linalg/detail/eltwise.cuh similarity index 97% rename from cpp/include/raft/linalg/detail/eltwise.hpp rename to cpp/include/raft/linalg/detail/eltwise.cuh index b15717f205..6d728c8b0f 100644 --- a/cpp/include/raft/linalg/detail/eltwise.hpp +++ b/cpp/include/raft/linalg/detail/eltwise.cuh @@ -18,8 +18,8 @@ #include "functional.cuh" -#include -#include +#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.cuh similarity index 100% rename from cpp/include/raft/linalg/detail/lanczos.hpp rename to cpp/include/raft/linalg/detail/lanczos.cuh diff --git a/cpp/include/raft/linalg/detail/lstsq.hpp b/cpp/include/raft/linalg/detail/lstsq.cuh similarity index 98% rename from cpp/include/raft/linalg/detail/lstsq.hpp rename to cpp/include/raft/linalg/detail/lstsq.cuh index 6553394cc4..3eef58b4df 100644 --- a/cpp/include/raft/linalg/detail/lstsq.hpp +++ b/cpp/include/raft/linalg/detail/lstsq.cuh @@ -18,20 +18,19 @@ #include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh index 94545e59f6..4cfccdcaa3 100644 --- a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh +++ b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/mean_squared_error.hpp b/cpp/include/raft/linalg/detail/mean_squared_error.cuh similarity index 96% rename from cpp/include/raft/linalg/detail/mean_squared_error.hpp rename to cpp/include/raft/linalg/detail/mean_squared_error.cuh index f0a9daebdb..5889314eea 100644 --- a/cpp/include/raft/linalg/detail/mean_squared_error.hpp +++ b/cpp/include/raft/linalg/detail/mean_squared_error.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/multiply.hpp b/cpp/include/raft/linalg/detail/multiply.cuh similarity index 96% rename from cpp/include/raft/linalg/detail/multiply.hpp rename to cpp/include/raft/linalg/detail/multiply.cuh index da06c23aed..ec3ec802de 100644 --- a/cpp/include/raft/linalg/detail/multiply.hpp +++ b/cpp/include/raft/linalg/detail/multiply.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/norm.hpp b/cpp/include/raft/linalg/detail/norm.cuh similarity index 99% rename from cpp/include/raft/linalg/detail/norm.hpp rename to cpp/include/raft/linalg/detail/norm.cuh index fcf98c7daf..03d03497e9 100644 --- a/cpp/include/raft/linalg/detail/norm.hpp +++ b/cpp/include/raft/linalg/detail/norm.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/qr.cuh b/cpp/include/raft/linalg/detail/qr.cuh index 81b1867a82..4aa843081e 100644 --- a/cpp/include/raft/linalg/detail/qr.cuh +++ b/cpp/include/raft/linalg/detail/qr.cuh @@ -18,7 +18,7 @@ #include "cublas_wrappers.hpp" #include "cusolver_wrappers.hpp" -#include +#include #include #include diff --git a/cpp/include/raft/linalg/detail/reduce.hpp b/cpp/include/raft/linalg/detail/reduce.cuh similarity index 95% rename from cpp/include/raft/linalg/detail/reduce.hpp rename to cpp/include/raft/linalg/detail/reduce.cuh index 94c8f5ba52..4d5fa87202 100644 --- a/cpp/include/raft/linalg/detail/reduce.hpp +++ b/cpp/include/raft/linalg/detail/reduce.cuh @@ -17,8 +17,8 @@ #pragma once #include -#include -#include +#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/rsvd.cuh b/cpp/include/raft/linalg/detail/rsvd.cuh index 3dc22a7e89..033534be55 100644 --- a/cpp/include/raft/linalg/detail/rsvd.cuh +++ b/cpp/include/raft/linalg/detail/rsvd.cuh @@ -17,14 +17,14 @@ #pragma once #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include diff --git a/cpp/include/raft/linalg/detail/strided_reduction.cuh b/cpp/include/raft/linalg/detail/strided_reduction.cuh index a0d1e2abaa..f7af9e88d6 100644 --- a/cpp/include/raft/linalg/detail/strided_reduction.cuh +++ b/cpp/include/raft/linalg/detail/strided_reduction.cuh @@ -19,7 +19,7 @@ #include "unary_op.cuh" #include #include -#include +#include #include namespace raft { diff --git a/cpp/include/raft/linalg/detail/subtract.cuh b/cpp/include/raft/linalg/detail/subtract.cuh index 23d5eded05..084c6d2fd3 100644 --- a/cpp/include/raft/linalg/detail/subtract.cuh +++ b/cpp/include/raft/linalg/detail/subtract.cuh @@ -17,8 +17,8 @@ #pragma once #include -#include -#include +#include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/detail/svd.hpp b/cpp/include/raft/linalg/detail/svd.cuh similarity index 98% rename from cpp/include/raft/linalg/detail/svd.hpp rename to cpp/include/raft/linalg/detail/svd.cuh index 5d349cd101..aa33dcb0a9 100644 --- a/cpp/include/raft/linalg/detail/svd.hpp +++ b/cpp/include/raft/linalg/detail/svd.cuh @@ -18,16 +18,16 @@ #include "cublas_wrappers.hpp" #include "cusolver_wrappers.hpp" -#include -#include -#include +#include +#include +#include #include #include #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/include/raft/linalg/detail/transpose.hpp b/cpp/include/raft/linalg/detail/transpose.cuh similarity index 100% rename from cpp/include/raft/linalg/detail/transpose.hpp rename to cpp/include/raft/linalg/detail/transpose.cuh diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh new file mode 100644 index 0000000000..820c42f0ea --- /dev/null +++ b/cpp/include/raft/linalg/divide.cuh @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DIVIDE_H +#define __DIVIDE_H + +#pragma once + +#include "detail/divide.cuh" + +namespace raft { +namespace linalg { + +using detail::divides_scalar; + +/** + * @defgroup ScalarOps Scalar operations on the input buffer + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + * @{ + */ +template +void divideScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) +{ + detail::divideScalar(out, in, scalar, len, stream); +} +/** @} */ + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/divide.hpp b/cpp/include/raft/linalg/divide.hpp index 6c8480bf19..88b919b92a 100644 --- a/cpp/include/raft/linalg/divide.hpp +++ b/cpp/include/raft/linalg/divide.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __DIVIDE_H +#define __DIVIDE_H #pragma once -#include "detail/divide.hpp" +#include "detail/divide.cuh" namespace raft { namespace linalg { @@ -43,3 +50,5 @@ void divideScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cud }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh new file mode 100644 index 0000000000..f1f02dc13e --- /dev/null +++ b/cpp/include/raft/linalg/eig.cuh @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __EIG_H +#define __EIG_H + +#pragma once + +#include "detail/eig.cuh" + +namespace raft { +namespace linalg { + +/** + * @defgroup eig Eigen Decomposition Methods + * @{ + */ + +/** + * @brief eig decomp with divide and conquer method for the column-major + * symmetric matrices + * @param handle raft handle + * @param in the input buffer (symmetric matrix that has real eig values and + * vectors. + * @param n_rows: number of rows of the input + * @param n_cols: number of cols of the input + * @param eig_vectors: eigenvectors + * @param eig_vals: eigen values + * @param stream cuda stream + */ +template +void eigDC(const raft::handle_t& handle, + const math_t* in, + std::size_t n_rows, + std::size_t n_cols, + math_t* eig_vectors, + math_t* eig_vals, + cudaStream_t stream) +{ + detail::eigDC(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream); +} + +using detail::COPY_INPUT; +using detail::EigVecMemUsage; +using detail::OVERWRITE_INPUT; + +/** + * @brief eig sel decomp with divide and conquer method for the column-major + * symmetric matrices + * @param handle raft handle + * @param in the input buffer (symmetric matrix that has real eig values and + * vectors. + * @param n_rows: number of rows of the input + * @param n_cols: number of cols of the input + * @param n_eig_vals: number of eigenvectors to be generated + * @param eig_vectors: eigenvectors + * @param eig_vals: eigen values + * @param memUsage: the memory selection for eig vector output + * @param stream cuda stream + */ +template +void eigSelDC(const raft::handle_t& handle, + math_t* in, + int n_rows, + int n_cols, + int n_eig_vals, + math_t* eig_vectors, + math_t* eig_vals, + EigVecMemUsage memUsage, + cudaStream_t stream) +{ + detail::eigSelDC(handle, in, n_rows, n_cols, n_eig_vals, eig_vectors, eig_vals, memUsage, stream); +} + +/** + * @brief overloaded function for eig decomp with Jacobi method for the + * column-major symmetric matrices (in parameter) + * @param handle: raft handle + * @param in: input matrix + * @param n_rows: number of rows of the input + * @param n_cols: number of cols of the input + * @param eig_vectors: eigenvectors + * @param eig_vals: eigen values + * @param stream: stream on which this function will be run + * @param tol: error tolerance for the jacobi method. Algorithm stops when the + * error is below tol + * @param sweeps: number of sweeps in the Jacobi algorithm. The more the better + * accuracy. + */ +template +void eigJacobi(const raft::handle_t& handle, + const math_t* in, + int n_rows, + int n_cols, + math_t* eig_vectors, + math_t* eig_vals, + cudaStream_t stream, + math_t tol = 1.e-7, + int sweeps = 15) +{ + detail::eigJacobi(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream, tol, sweeps); +} +/** @} */ // end of eig + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.hpp b/cpp/include/raft/linalg/eig.hpp index 5c465a3a41..9417b6fb3f 100644 --- a/cpp/include/raft/linalg/eig.hpp +++ b/cpp/include/raft/linalg/eig.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __EIG_H +#define __EIG_H #pragma once -#include "detail/eig.hpp" +#include "detail/eig.cuh" namespace raft { namespace linalg { @@ -114,3 +121,5 @@ void eigJacobi(const raft::handle_t& handle, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eltwise.cuh b/cpp/include/raft/linalg/eltwise.cuh new file mode 100644 index 0000000000..dbc06a4af3 --- /dev/null +++ b/cpp/include/raft/linalg/eltwise.cuh @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ELTWISE_H +#define __ELTWISE_H + +#pragma once + +#include "detail/eltwise.cuh" + +namespace raft { +namespace linalg { + +using detail::adds_scalar; + +/** + * @defgroup ScalarOps Scalar operations on the input buffer + * @tparam InType data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + * @{ + */ +template +void scalarAdd(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream) +{ + detail::scalarAdd(out, in, scalar, len, stream); +} + +using detail::multiplies_scalar; + +template +void scalarMultiply(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream) +{ + detail::scalarMultiply(out, in, scalar, len, stream); +} +/** @} */ + +/** + * @defgroup BinaryOps Element-wise binary operations on the input buffers + * @tparam InType data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in1 the first input buffer + * @param in2 the second input buffer + * @param len number of elements in the input buffers + * @param stream cuda stream where to launch work + * @{ + */ +template +void eltwiseAdd( + OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) +{ + detail::eltwiseAdd(out, in1, in2, len, stream); +} + +template +void eltwiseSub( + OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) +{ + detail::eltwiseSub(out, in1, in2, len, stream); +} + +template +void eltwiseMultiply( + OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) +{ + detail::eltwiseMultiply(out, in1, in2, len, stream); +} + +template +void eltwiseDivide( + OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) +{ + detail::eltwiseDivide(out, in1, in2, len, stream); +} + +using detail::divides_check_zero; + +template +void eltwiseDivideCheckZero( + OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) +{ + detail::eltwiseDivideCheckZero(out, in1, in2, len, stream); +} +/** @} */ + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eltwise.hpp b/cpp/include/raft/linalg/eltwise.hpp index 5c2a97b57d..0ebefc7c25 100644 --- a/cpp/include/raft/linalg/eltwise.hpp +++ b/cpp/include/raft/linalg/eltwise.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __ELTWISE_H +#define __ELTWISE_H #pragma once -#include "detail/eltwise.hpp" +#include "detail/eltwise.cuh" namespace raft { namespace linalg { @@ -100,3 +107,5 @@ void eltwiseDivideCheckZero( }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemm.cuh b/cpp/include/raft/linalg/gemm.cuh new file mode 100644 index 0000000000..9670834ff0 --- /dev/null +++ b/cpp/include/raft/linalg/gemm.cuh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __GEMM_H +#define __GEMM_H + +#pragma once + +#include "detail/gemm.hpp" + +namespace raft { +namespace linalg { + +/** + * @brief the wrapper of cublas gemm function + * It computes the following equation: C = alpha .* opA(A) * opB(B) + beta .* C + * + * @tparam math_t the element type + * @tparam DevicePointerMode whether pointers alpha, beta point to device memory + * @param [in] handle raft handle + * @param [in] trans_a cublas transpose op for A + * @param [in] trans_b cublas transpose op for B + * @param [in] m number of rows of C + * @param [in] n number of columns of C + * @param [in] k number of rows of opB(B) / number of columns of opA(A) + * @param [in] alpha host or device scalar + * @param [in] A such a matrix that the shape of column-major opA(A) is [m, k] + * @param [in] lda leading dimension of A + * @param [in] B such a matrix that the shape of column-major opA(B) is [k, n] + * @param [in] ldb leading dimension of B + * @param [in] beta host or device scalar + * @param [inout] C column-major matrix of size [m, n] + * @param [in] ldc leading dimension of C + * @param [in] stream + */ +template +void gemm(const raft::handle_t& handle, + const bool trans_a, + const bool trans_b, + const int m, + const int n, + const int k, + const math_t* alpha, + const math_t* A, + const int lda, + const math_t* B, + const int ldb, + const math_t* beta, + const math_t* C, + const int ldc, + cudaStream_t stream) +{ + detail::gemm( + handle, trans_a, trans_b, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream); +} + +/** + * @brief the wrapper of cublas gemm function + * It computes the following equation: D = alpha . opA(A) * opB(B) + beta . C + * @tparam math_t the type of input/output matrices + * @param handle raft handle + * @param a input matrix + * @param n_rows_a number of rows of A + * @param n_cols_a number of columns of A + * @param b input matrix + * @param c output matrix + * @param n_rows_c number of rows of C + * @param n_cols_c number of columns of C + * @param trans_a cublas transpose op for A + * @param trans_b cublas transpose op for B + * @param alpha scalar + * @param beta scalar + * @param stream cuda stream + */ +template +void gemm(const raft::handle_t& handle, + const math_t* a, + int n_rows_a, + int n_cols_a, + const math_t* b, + math_t* c, + int n_rows_c, + int n_cols_c, + cublasOperation_t trans_a, + cublasOperation_t trans_b, + math_t alpha, + math_t beta, + cudaStream_t stream) +{ + detail::gemm( + handle, a, n_rows_a, n_cols_a, b, c, n_rows_c, n_cols_c, trans_a, trans_b, alpha, beta, stream); +} + +/** + * @brief the wrapper of cublas gemm function + * It computes the following equation: D = alpha . opA(A) * opB(B) + beta . C + * @tparam math_t the type of input/output matrices + * @param handle raft handle + * @param a input matrix + * @param n_rows_a number of rows of A + * @param n_cols_a number of columns of A + * @param b input matrix + * @param c output matrix + * @param n_rows_c number of rows of C + * @param n_cols_c number of columns of C + * @param trans_a cublas transpose op for A + * @param trans_b cublas transpose op for B + * @param stream cuda stream + */ +template +void gemm(const raft::handle_t& handle, + const math_t* a, + int n_rows_a, + int n_cols_a, + const math_t* b, + math_t* c, + int n_rows_c, + int n_cols_c, + cublasOperation_t trans_a, + cublasOperation_t trans_b, + cudaStream_t stream) +{ + detail::gemm(handle, a, n_rows_a, n_cols_a, b, c, n_rows_c, n_cols_c, trans_a, trans_b, stream); +} + +/** + * @brief A wrapper for CUBLS GEMM function designed for handling all possible + * combinations of operand layouts. + * It computes the following equation: Z = alpha . X * Y + beta . Z + * @tparam T Data type of input/output matrices (float/double) + * @param handle raft handle + * @param z output matrix of size M rows x N columns + * @param x input matrix of size M rows x K columns + * @param y input matrix of size K rows x N columns + * @param _M number of rows of X and Z + * @param _N number of rows of Y and columns of Z + * @param _K number of columns of X and rows of Y + * @param isZColMajor Storage layout of Z. true = col major, false = row major + * @param isXColMajor Storage layout of X. true = col major, false = row major + * @param isYColMajor Storage layout of Y. true = col major, false = row major + * @param stream cuda stream + * @param alpha scalar + * @param beta scalar + */ +template +void gemm(const raft::handle_t& handle, + T* z, + T* x, + T* y, + int _M, + int _N, + int _K, + bool isZColMajor, + bool isXColMajor, + bool isYColMajor, + cudaStream_t stream, + T alpha = T(1.0), + T beta = T(0.0)) +{ + detail::gemm( + handle, z, x, y, _M, _N, _K, isZColMajor, isXColMajor, isYColMajor, stream, alpha, beta); +} + +} // end namespace linalg +} // end namespace raft + +#endif diff --git a/cpp/include/raft/linalg/gemm.hpp b/cpp/include/raft/linalg/gemm.hpp index f22d15e650..736590938b 100644 --- a/cpp/include/raft/linalg/gemm.hpp +++ b/cpp/include/raft/linalg/gemm.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __GEMM_H +#define __GEMM_H #pragma once @@ -173,3 +180,5 @@ void gemm(const raft::handle_t& handle, } // end namespace linalg } // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh new file mode 100644 index 0000000000..26a6386148 --- /dev/null +++ b/cpp/include/raft/linalg/gemv.cuh @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __GEMV_H +#define __GEMV_H + +#pragma once + +#include "detail/gemv.hpp" + +namespace raft { +namespace linalg { + +/** + * @brief the wrapper of cublas gemv function + * It computes the following equation: y = alpha .* op(A) * x + beta .* y + * + * @tparam math_t the element type + * @tparam DevicePointerMode whether pointers alpha, beta point to device memory + * @param [in] handle raft handle + * @param [in] trans_a cublas transpose op for A + * @param [in] m number of rows of A + * @param [in] n number of columns of A + * @param [in] alpha host or device scalar + * @param [in] A column-major matrix of size [m, n] + * @param [in] lda leading dimension of A + * @param [in] x vector of length n if trans_a else m + * @param [in] incx stride between consecutive elements of x + * @param [in] beta host or device scalar + * @param [inout] y vector of length m if trans_a else n + * @param [in] incy stride between consecutive elements of y + * @param [in] stream + */ +template +void gemv(const raft::handle_t& handle, + const bool trans_a, + const int m, + const int n, + const math_t* alpha, + const math_t* A, + const int lda, + const math_t* x, + const int incx, + const math_t* beta, + math_t* y, + const int incy, + cudaStream_t stream) +{ + detail::gemv( + handle, trans_a, m, n, alpha, A, lda, x, incx, beta, y, incy, stream); +} + +template +void gemv(const raft::handle_t& handle, + const math_t* A, + const int n_rows, + const int n_cols, + const math_t* x, + const int incx, + math_t* y, + const int incy, + const bool trans_a, + const math_t alpha, + const math_t beta, + cudaStream_t stream) +{ + detail::gemv(handle, A, n_rows, n_cols, x, incx, y, incy, trans_a, alpha, beta, stream); +} + +/** + * y = alpha * op(A) * x + beta * y + * + * where + * + * @param handle raft handle + * @param A is a column-major matrix of size n_rows_a * n_cols_a. + * op(A) is either the transpose operation (trans_a == true) or identity. + * @param n_rows_a number of rows in A + * @param n_cols_a number of cols in A + * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. + * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. + * @param trans_a whether to take transpose of a + * @param alpha is a scalar scale of Ax. + * @param beta is a scalar scale of y. + * @param stream stream on which this function is run + */ +template +void gemv(const raft::handle_t& handle, + const math_t* A, + const int n_rows_a, + const int n_cols_a, + const math_t* x, + math_t* y, + const bool trans_a, + const math_t alpha, + const math_t beta, + cudaStream_t stream) +{ + detail::gemv(handle, A, n_rows_a, n_cols_a, x, y, trans_a, alpha, beta, stream); +} + +/** + * y = op(A) * x + * + * where + * + * @param handle raft handle + * @param A is a column-major matrix of size n_rows_a * n_cols_a. + * op(A) is either the transpose operation (trans_a == true) or identity. + * @param n_rows_a number of rows in A + * @param n_cols_a number of cols in A + * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. + * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. + * @param trans_a whether to take transpose of a + * @param stream stream on which this function is run + */ +template +void gemv(const raft::handle_t& handle, + const math_t* A, + const int n_rows_a, + const int n_cols_a, + const math_t* x, + math_t* y, + const bool trans_a, + cudaStream_t stream) +{ + detail::gemv(handle, A, n_rows_a, n_cols_a, x, y, trans_a, stream); +} + +/** + * y = alpha * op(A) * x + beta * y + * + * where + * @param handle raft handle + * @param A is a column-major matrix of size n_rows_a * n_cols_a. + * op(A) is either the transpose operation (trans_a == true) or identity. + * @param n_rows_a number of rows in A + * @param n_cols_a number of cols in A + * @param lda is the leading dimension of A (number of rows); lda must be not smaller than n_rows_a. + * set it when you need to use only the first n_rows_a rows of the matrix A, which has + * (perhaps, due to padding) lda rows. + * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. + * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. + * @param trans_a whether to take transpose of a + * @param alpha is a scalar scale of Ax. + * @param beta is a scalar scale of y. + * @param stream stream on which this function is run + */ +template +void gemv(const raft::handle_t& handle, + const math_t* A, + const int n_rows_a, + const int n_cols_a, + const int lda, + const math_t* x, + math_t* y, + const bool trans_a, + const math_t alpha, + const math_t beta, + cudaStream_t stream) +{ + detail::gemv(handle, A, n_rows_a, n_cols_a, lda, x, y, trans_a, alpha, beta, stream); +} + +/** + * y = op(A) * x + * + * where + * @param handle raft handle + * @param A is a column-major matrix of size n_rows_a * n_cols_a. + * op(A) is either the transpose operation (trans_a == true) or identity. + * @param n_rows_a number of rows in A + * @param n_cols_a number of cols in A + * @param lda is the leading dimension of A (number of rows); lda must be not smaller than n_rows_a. + * set it when you need to use only the first n_rows_a rows of the matrix A, which has + * (perhaps, due to padding) lda rows. + * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. + * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. + * @param trans_a whether to take transpose of a + * @param stream stream on which this function is run + * + */ +template +void gemv(const raft::handle_t& handle, + const math_t* A, + const int n_rows_a, + const int n_cols_a, + const int lda, + const math_t* x, + math_t* y, + const bool trans_a, + cudaStream_t stream) +{ + detail::gemv(handle, A, n_rows_a, n_cols_a, lda, x, y, trans_a, stream); +} + +}; // namespace linalg +}; // namespace raft +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.hpp b/cpp/include/raft/linalg/gemv.hpp index 2098027b16..d6e0e0326b 100644 --- a/cpp/include/raft/linalg/gemv.hpp +++ b/cpp/include/raft/linalg/gemv.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __GEMV_H +#define __GEMV_H #pragma once @@ -206,3 +213,5 @@ void gemv(const raft::handle_t& handle, }; // namespace linalg }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/init.cuh b/cpp/include/raft/linalg/init.cuh new file mode 100644 index 0000000000..2fdf9dceb9 --- /dev/null +++ b/cpp/include/raft/linalg/init.cuh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __INIT_H +#define __INIT_H + +#pragma once + +#include "detail/init.hpp" + +namespace raft { +namespace linalg { + +/** + * @brief Like Python range. + * + * Fills the output as out[i] = i. + * + * \param [out] out device array, size [end-start] + * \param [in] start of the range + * \param [in] end of range (exclusive) + * \param [in] stream cuda stream + */ +template +void range(T* out, int start, int end, cudaStream_t stream) +{ + detail::range(out, start, end, stream); +} + +/** + * @brief Like Python range. + * + * Fills the output as out[i] = i. + * + * \param [out] out device array, size [n] + * \param [in] n length of the array + * \param [in] stream cuda stream + */ +template +void range(T* out, int n, cudaStream_t stream) +{ + detail::range(out, n, stream); +} + +} // namespace linalg +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/init.hpp b/cpp/include/raft/linalg/init.hpp index bb577672e8..af3486f278 100644 --- a/cpp/include/raft/linalg/init.hpp +++ b/cpp/include/raft/linalg/init.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __INIT_H +#define __INIT_H #pragma once @@ -54,3 +61,5 @@ void range(T* out, int n, cudaStream_t stream) } // namespace linalg } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lanczos.cuh b/cpp/include/raft/linalg/lanczos.cuh new file mode 100644 index 0000000000..a7157adfab --- /dev/null +++ b/cpp/include/raft/linalg/lanczos.cuh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __LANCZOS_H +#define __LANCZOS_H + +#pragma once + +#include "detail/lanczos.cuh" +#include + +namespace raft { +namespace linalg { + +// ========================================================= +// Eigensolver +// ========================================================= + +/** + * @brief Compute smallest eigenvectors of symmetric matrix + * Computes eigenvalues and eigenvectors that are least + * positive. If matrix is positive definite or positive + * semidefinite, the computed eigenvalues are smallest in + * magnitude. + * The largest eigenvalue is estimated by performing several + * Lanczos iterations. An implicitly restarted Lanczos method is + * then applied to A+s*I, where s is negative the largest + * eigenvalue. + * @tparam index_type_t the type of data used for indexing. + * @tparam value_type_t the type of data used for weights, distances. + * @param handle the raft handle. + * @param A Matrix. + * @param nEigVecs Number of eigenvectors to compute. + * @param maxIter Maximum number of Lanczos steps. Does not include + * Lanczos steps used to estimate largest eigenvalue. + * @param restartIter Maximum size of Lanczos system before + * performing an implicit restart. Should be at least 4. + * @param tol Convergence tolerance. Lanczos iteration will + * terminate when the residual norm is less than tol*theta, where + * theta is an estimate for the smallest unwanted eigenvalue + * (i.e. the (nEigVecs+1)th smallest eigenvalue). + * @param reorthogonalize Whether to reorthogonalize Lanczos + * vectors. + * @param iter On exit, pointer to total number of Lanczos + * iterations performed. Does not include Lanczos steps used to + * estimate largest eigenvalue. + * @param eigVals_dev (Output, device memory, nEigVecs entries) + * Smallest eigenvalues of matrix. + * @param eigVecs_dev (Output, device memory, n*nEigVecs entries) + * Eigenvectors corresponding to smallest eigenvalues of + * matrix. Vectors are stored as columns of a column-major matrix + * with dimensions n x nEigVecs. + * @param seed random seed. + * @return error flag. + */ +template +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 1234567) +{ + return detail::computeSmallestEigenvectors(handle, + A, + nEigVecs, + maxIter, + restartIter, + tol, + reorthogonalize, + iter, + eigVals_dev, + eigVecs_dev, + seed); +} + +/** + * @brief Compute largest eigenvectors of symmetric matrix + * Computes eigenvalues and eigenvectors that are least + * positive. If matrix is positive definite or positive + * semidefinite, the computed eigenvalues are largest in + * magnitude. + * The largest eigenvalue is estimated by performing several + * Lanczos iterations. An implicitly restarted Lanczos method is + * then applied to A+s*I, where s is negative the largest + * eigenvalue. + * @tparam index_type_t the type of data used for indexing. + * @tparam value_type_t the type of data used for weights, distances. + * @param handle the raft handle. + * @param A Matrix. + * @param nEigVecs Number of eigenvectors to compute. + * @param maxIter Maximum number of Lanczos steps. Does not include + * Lanczos steps used to estimate largest eigenvalue. + * @param restartIter Maximum size of Lanczos system before + * performing an implicit restart. Should be at least 4. + * @param tol Convergence tolerance. Lanczos iteration will + * terminate when the residual norm is less than tol*theta, where + * theta is an estimate for the largest unwanted eigenvalue + * (i.e. the (nEigVecs+1)th largest eigenvalue). + * @param reorthogonalize Whether to reorthogonalize Lanczos + * vectors. + * @param iter On exit, pointer to total number of Lanczos + * iterations performed. Does not include Lanczos steps used to + * estimate largest eigenvalue. + * @param eigVals_dev (Output, device memory, nEigVecs entries) + * Largest eigenvalues of matrix. + * @param eigVecs_dev (Output, device memory, n*nEigVecs entries) + * Eigenvectors corresponding to largest eigenvalues of + * matrix. Vectors are stored as columns of a column-major matrix + * with dimensions n x nEigVecs. + * @param seed random seed. + * @return error flag. + */ +template +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 123456) +{ + return detail::computeLargestEigenvectors(handle, + A, + nEigVecs, + maxIter, + restartIter, + tol, + reorthogonalize, + iter, + eigVals_dev, + eigVecs_dev, + seed); +} + +} // namespace linalg +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lanczos.hpp b/cpp/include/raft/linalg/lanczos.hpp index 21b65158fc..7663af3cb2 100644 --- a/cpp/include/raft/linalg/lanczos.hpp +++ b/cpp/include/raft/linalg/lanczos.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __LANCZOS_H +#define __LANCZOS_H #pragma once -#include "detail/lanczos.hpp" +#include "detail/lanczos.cuh" #include namespace raft { @@ -156,3 +163,5 @@ int computeLargestEigenvectors( } // namespace linalg } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh new file mode 100644 index 0000000000..255f1293f4 --- /dev/null +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __LSTSQ_H +#define __LSTSQ_H + +#pragma once + +#include +#include +namespace raft { +namespace linalg { + +/** Solves the linear ordinary least squares problem `Aw = b` + * Via SVD decomposition of `A = U S Vt` using default cuSOLVER routine. + * + * @param[in] handle raft handle + * @param[inout] A input feature matrix. + * Warning: the content of this matrix is modified by the cuSOLVER routines. + * @param[in] n_rows number of rows in A + * @param[in] n_cols number of columns in A + * @param[inout] b input target vector. + * Warning: the content of this vector is modified by the cuSOLVER routines. + * @param[out] w output coefficient vector + * @param[in] stream cuda stream for ordering operations + */ +template +void lstsqSvdQR(const raft::handle_t& handle, + math_t* A, + const int n_rows, + const int n_cols, + const math_t* b, + math_t* w, + cudaStream_t stream) +{ + detail::lstsqSvdQR(handle, A, n_rows, n_cols, b, w, stream); +} + +/** Solves the linear ordinary least squares problem `Aw = b` + * Via SVD decomposition of `A = U S V^T` using Jacobi iterations (cuSOLVER). + * + * @param[in] handle raft handle + * @param[inout] A input feature matrix. + * Warning: the content of this matrix is modified by the cuSOLVER routines. + * @param[in] n_rows number of rows in A + * @param[in] n_cols number of columns in A + * @param[inout] b input target vector. + * Warning: the content of this vector is modified by the cuSOLVER routines. + * @param[out] w output coefficient vector + * @param[in] stream cuda stream for ordering operations + */ +template +void lstsqSvdJacobi(const raft::handle_t& handle, + math_t* A, + const int n_rows, + const int n_cols, + const math_t* b, + math_t* w, + cudaStream_t stream) +{ + detail::lstsqSvdJacobi(handle, A, n_rows, n_cols, b, w, stream); +} + +/** Solves the linear ordinary least squares problem `Aw = b` + * via eigenvalue decomposition of `A^T * A` (covariance matrix for dataset A). + * (`w = (A^T A)^-1 A^T b`) + */ +template +void lstsqEig(const raft::handle_t& handle, + const math_t* A, + const int n_rows, + const int n_cols, + const math_t* b, + math_t* w, + cudaStream_t stream) +{ + detail::lstsqEig(handle, A, n_rows, n_cols, b, w, stream); +} + +/** Solves the linear ordinary least squares problem `Aw = b` + * via QR decomposition of `A = QR`. + * (triangular system of equations `Rw = Q^T b`) + * + * @param[in] handle raft handle + * @param[inout] A input feature matrix. + * Warning: the content of this matrix is modified by the cuSOLVER routines. + * @param[in] n_rows number of rows in A + * @param[in] n_cols number of columns in A + * @param[inout] b input target vector. + * Warning: the content of this vector is modified by the cuSOLVER routines. + * @param[out] w output coefficient vector + * @param[in] stream cuda stream for ordering operations + */ +template +void lstsqQR(const raft::handle_t& handle, + math_t* A, + const int n_rows, + const int n_cols, + math_t* b, + math_t* w, + cudaStream_t stream) +{ + detail::lstsqQR(handle, A, n_rows, n_cols, b, w, stream); +} + +}; // namespace linalg +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lstsq.hpp b/cpp/include/raft/linalg/lstsq.hpp index 57dd0a7b15..008fcab653 100644 --- a/cpp/include/raft/linalg/lstsq.hpp +++ b/cpp/include/raft/linalg/lstsq.hpp @@ -13,11 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __LSTSQ_H +#define __LSTSQ_H #pragma once #include -#include +#include namespace raft { namespace linalg { @@ -115,3 +122,5 @@ void lstsqQR(const raft::handle_t& handle, }; // namespace linalg }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/map.cuh b/cpp/include/raft/linalg/map.cuh new file mode 100644 index 0000000000..5df4d24b4f --- /dev/null +++ b/cpp/include/raft/linalg/map.cuh @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MAP_H +#define __MAP_H + +#pragma once + +#include "detail/map.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief CUDA version of map + * @tparam InType data-type upon which the math operation will be performed + * @tparam MapOp the device-lambda performing the actual operation + * @tparam TPB threads-per-block in the final kernel launched + * @tparam Args additional parameters + * @tparam OutType data-type in which the result will be stored + * @param out the output of the map operation (assumed to be a device pointer) + * @param len number of elements in the input array + * @param map the device-lambda + * @param stream cuda-stream where to launch this kernel + * @param in the input array + * @param args additional input arrays + */ + +template +void map(OutType* out, size_t len, MapOp map, cudaStream_t stream, const InType* in, Args... args) +{ + detail::mapImpl(out, len, map, stream, in, args...); +} + +} // namespace linalg +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/map.hpp b/cpp/include/raft/linalg/map.hpp index febeaa8621..d4ee231eb1 100644 --- a/cpp/include/raft/linalg/map.hpp +++ b/cpp/include/raft/linalg/map.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MAP_H +#define __MAP_H #pragma once @@ -48,3 +55,5 @@ void map(OutType* out, size_t len, MapOp map, cudaStream_t stream, const InType* } // namespace linalg }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/map_then_reduce.cuh b/cpp/include/raft/linalg/map_then_reduce.cuh new file mode 100644 index 0000000000..36828cf154 --- /dev/null +++ b/cpp/include/raft/linalg/map_then_reduce.cuh @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MAP_THEN_REDUCE_H +#define __MAP_THEN_REDUCE_H + +#pragma once + +#include "detail/map_then_reduce.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief CUDA version of map and then sum reduction operation + * @tparam Type data-type upon which the math operation will be performed + * @tparam MapOp the device-lambda performing the actual operation + * @tparam TPB threads-per-block in the final kernel launched + * @tparam Args additional parameters + * @param out the output sum-reduced value (assumed to be a device pointer) + * @param len number of elements in the input array + * @param map the device-lambda + * @param stream cuda-stream where to launch this kernel + * @param in the input array + * @param args additional input arrays + */ + +template +void mapThenSumReduce( + OutType* out, size_t len, MapOp map, cudaStream_t stream, const InType* in, Args... args) +{ + detail::mapThenReduceImpl( + out, len, (OutType)0, map, detail::sum_tag(), stream, in, args...); +} + +/** + * @brief CUDA version of map and then generic reduction operation + * @tparam Type data-type upon which the math operation will be performed + * @tparam MapOp the device-lambda performing the actual map operation + * @tparam ReduceLambda the device-lambda performing the actual reduction + * @tparam TPB threads-per-block in the final kernel launched + * @tparam Args additional parameters + * @param out the output reduced value (assumed to be a device pointer) + * @param len number of elements in the input array + * @param neutral The neutral element of the reduction operation. For example: + * 0 for sum, 1 for multiply, +Inf for Min, -Inf for Max + * @param map the device-lambda + * @param op the reduction device lambda + * @param stream cuda-stream where to launch this kernel + * @param in the input array + * @param args additional input arrays + */ + +template +void mapThenReduce(OutType* out, + size_t len, + OutType neutral, + MapOp map, + ReduceLambda op, + cudaStream_t stream, + const InType* in, + Args... args) +{ + detail::mapThenReduceImpl( + out, len, neutral, map, op, stream, in, args...); +} +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/map_then_reduce.hpp b/cpp/include/raft/linalg/map_then_reduce.hpp index 04275995a0..c4b136d1b8 100644 --- a/cpp/include/raft/linalg/map_then_reduce.hpp +++ b/cpp/include/raft/linalg/map_then_reduce.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MAP_THEN_REDUCE_H +#define __MAP_THEN_REDUCE_H #pragma once @@ -85,3 +92,5 @@ void mapThenReduce(OutType* out, } }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh new file mode 100644 index 0000000000..56437313e3 --- /dev/null +++ b/cpp/include/raft/linalg/matrix_vector_op.cuh @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MATRIX_VECTOR_OP_H +#define __MATRIX_VECTOR_OP_H + +#pragma once + +#include "detail/matrix_vector_op.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Operations for all the columns or rows with a given vector. + * Caution : Threads process multiple elements to speed up processing. These + * are loaded in a single read thanks to type promotion. Faster processing + * would thus only be enabled when adresses are optimally aligned for it. + * Note : the function will also check that the size of the window of accesses + * is a multiple of the number of elements processed by a thread in order to + * enable faster processing + * @tparam Type the matrix/vector type + * @tparam Lambda a device function which represents a binary operator + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads per block of the cuda kernel launched + * @param out the output matrix (passing out = matrix makes it in-place) + * @param matrix the input matrix + * @param vec the vector + * @param D number of columns of matrix + * @param N number of rows of matrix + * @param rowMajor whether input is row or col major + * @param bcastAlongRows whether the broadcast of vector needs to happen along + * the rows of the matrix or columns + * @param op the mathematical operation + * @param stream cuda stream where to launch work + */ +template +void matrixVectorOp(Type* out, + const Type* matrix, + const Type* vec, + IdxType D, + IdxType N, + bool rowMajor, + bool bcastAlongRows, + Lambda op, + cudaStream_t stream) +{ + detail::matrixVectorOp(out, matrix, vec, D, N, rowMajor, bcastAlongRows, op, stream); +} + +/** + * @brief Operations for all the columns or rows with the given vectors. + * Caution : Threads process multiple elements to speed up processing. These + * are loaded in a single read thanks to type promotion. Faster processing + * would thus only be enabled when adresses are optimally aligned for it. + * Note : the function will also check that the size of the window of accesses + * is a multiple of the number of elements processed by a thread in order to + * enable faster processing + * @tparam Type the matrix/vector type + * @tparam Lambda a device function which represents a binary operator + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads per block of the cuda kernel launched + * @param out the output matrix (passing out = matrix makes it in-place) + * @param matrix the input matrix + * @param vec1 the first vector + * @param vec2 the second vector + * @param D number of columns of matrix + * @param N number of rows of matrix + * @param rowMajor whether input is row or col major + * @param bcastAlongRows whether the broadcast of vector needs to happen along + * the rows of the matrix or columns + * @param op the mathematical operation + * @param stream cuda stream where to launch work + */ +template +void matrixVectorOp(Type* out, + const Type* matrix, + const Type* vec1, + const Type* vec2, + IdxType D, + IdxType N, + bool rowMajor, + bool bcastAlongRows, + Lambda op, + cudaStream_t stream) +{ + detail::matrixVectorOp(out, matrix, vec1, vec2, D, N, rowMajor, bcastAlongRows, op, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.hpp b/cpp/include/raft/linalg/matrix_vector_op.hpp index b9790ebce2..c041d4c263 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.hpp +++ b/cpp/include/raft/linalg/matrix_vector_op.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MATRIX_VECTOR_OP_H +#define __MATRIX_VECTOR_OP_H #pragma once @@ -99,3 +106,5 @@ void matrixVectorOp(Type* out, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh new file mode 100644 index 0000000000..1b3297f926 --- /dev/null +++ b/cpp/include/raft/linalg/mean_squared_error.cuh @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MSE_H +#define __MSE_H + +#pragma once + +#include "detail/mean_squared_error.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief CUDA version mean squared error function mean((A-B)**2) + * @tparam math_t data-type upon which the math operation will be performed + * @tparam TPB threads-per-block + * @param out the output mean squared error value (assumed to be a device pointer) + * @param A input array (assumed to be a device pointer) + * @param B input array (assumed to be a device pointer) + * @param len number of elements in the input arrays + * @param weight weight to apply to every term in the mean squared error calculation + * @param stream cuda-stream where to launch this kernel + */ +template +void meanSquaredError( + math_t* out, const math_t* A, const math_t* B, size_t len, math_t weight, cudaStream_t stream) +{ + detail::meanSquaredError(out, A, B, len, weight, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/mean_squared_error.hpp b/cpp/include/raft/linalg/mean_squared_error.hpp index 42af8642b6..95428d47e0 100644 --- a/cpp/include/raft/linalg/mean_squared_error.hpp +++ b/cpp/include/raft/linalg/mean_squared_error.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MSE_H +#define __MSE_H #pragma once -#include "detail/mean_squared_error.hpp" +#include "detail/mean_squared_error.cuh" namespace raft { namespace linalg { @@ -41,3 +48,5 @@ void meanSquaredError( }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh new file mode 100644 index 0000000000..f1161b23cb --- /dev/null +++ b/cpp/include/raft/linalg/multiply.cuh @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MULTIPLY_H +#define __MULTIPLY_H + +#pragma once + +#include "detail/multiply.cuh" + +namespace raft { +namespace linalg { + +/** + * @defgroup ScalarOps Scalar operations on the input buffer + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + * @{ + */ +template +void multiplyScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) +{ + detail::multiplyScalar(out, in, scalar, len, stream); +} +/** @} */ + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/multiply.hpp b/cpp/include/raft/linalg/multiply.hpp index 4a1628b44a..260fb25018 100644 --- a/cpp/include/raft/linalg/multiply.hpp +++ b/cpp/include/raft/linalg/multiply.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MULTIPLY_H +#define __MULTIPLY_H #pragma once -#include "detail/multiply.hpp" +#include "detail/multiply.cuh" namespace raft { namespace linalg { @@ -41,3 +48,5 @@ void multiplyScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, c }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/norm.cuh b/cpp/include/raft/linalg/norm.cuh new file mode 100644 index 0000000000..87bd2a2b0a --- /dev/null +++ b/cpp/include/raft/linalg/norm.cuh @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NORM_H +#define __NORM_H + +#pragma once + +#include "detail/norm.cuh" + +namespace raft { +namespace linalg { + +/** different types of norms supported on the input buffers */ +using detail::L1Norm; +using detail::L2Norm; +using detail::NormType; + +/** + * @brief Compute row-wise norm of the input matrix and perform fin_op lambda + * + * Row-wise norm is useful while computing pairwise distance matrix, for + * example. + * This is used in many clustering algos like knn, kmeans, dbscan, etc... The + * current implementation is optimized only for bigger values of 'D'. + * + * @tparam Type the data type + * @tparam Lambda device final lambda + * @tparam IdxType Integer type used to for addressing + * @param dots the output vector of row-wise dot products + * @param data the input matrix (currently assumed to be row-major) + * @param D number of columns of data + * @param N number of rows of data + * @param type the type of norm to be applied + * @param rowMajor whether the input is row-major or not + * @param stream cuda stream where to launch work + * @param fin_op the final lambda op + */ +template > +void rowNorm(Type* dots, + const Type* data, + IdxType D, + IdxType N, + NormType type, + bool rowMajor, + cudaStream_t stream, + Lambda fin_op = raft::Nop()) +{ + detail::rowNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op); +} + +/** + * @brief Compute column-wise norm of the input matrix and perform fin_op + * @tparam Type the data type + * @tparam Lambda device final lambda + * @tparam IdxType Integer type used to for addressing + * @param dots the output vector of column-wise dot products + * @param data the input matrix (currently assumed to be row-major) + * @param D number of columns of data + * @param N number of rows of data + * @param type the type of norm to be applied + * @param rowMajor whether the input is row-major or not + * @param stream cuda stream where to launch work + * @param fin_op the final lambda op + */ +template > +void colNorm(Type* dots, + const Type* data, + IdxType D, + IdxType N, + NormType type, + bool rowMajor, + cudaStream_t stream, + Lambda fin_op = raft::Nop()) +{ + detail::colNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/norm.hpp b/cpp/include/raft/linalg/norm.hpp index a6336769ca..7be524f6de 100644 --- a/cpp/include/raft/linalg/norm.hpp +++ b/cpp/include/raft/linalg/norm.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __NORM_H +#define __NORM_H #pragma once -#include "detail/norm.hpp" +#include "detail/norm.cuh" namespace raft { namespace linalg { @@ -88,3 +95,5 @@ void colNorm(Type* dots, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh index d17fa9a043..f94fcfc894 100644 --- a/cpp/include/raft/linalg/power.cuh +++ b/cpp/include/raft/linalg/power.cuh @@ -13,12 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __POWER_H +#define __POWER_H #pragma once #include -#include -#include +#include +#include namespace raft { namespace linalg { @@ -63,3 +65,5 @@ void power(math_t* out, const math_t* in1, const math_t* in2, IdxType len, cudaS }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.hpp b/cpp/include/raft/linalg/power.hpp new file mode 100644 index 0000000000..124ee8513a --- /dev/null +++ b/cpp/include/raft/linalg/power.hpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __POWER_H +#define __POWER_H + +#pragma once + +#include +#include +#include + +namespace raft { +namespace linalg { + +/** + * @defgroup ScalarOps Scalar operations on the input buffer + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + * @{ + */ +template +void powerScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) +{ + raft::linalg::unaryOp( + out, in, len, [scalar] __device__(math_t in) { return raft::myPow(in, scalar); }, stream); +} +/** @} */ + +/** + * @defgroup BinaryOps Element-wise binary operations on the input buffers + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in1 the first input buffer + * @param in2 the second input buffer + * @param len number of elements in the input buffers + * @param stream cuda stream where to launch work + * @{ + */ +template +void power(math_t* out, const math_t* in1, const math_t* in2, IdxType len, cudaStream_t stream) +{ + raft::linalg::binaryOp( + out, in1, in2, len, [] __device__(math_t a, math_t b) { return raft::myPow(a, b); }, stream); +} +/** @} */ + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/qr.cuh b/cpp/include/raft/linalg/qr.cuh new file mode 100644 index 0000000000..fe6a5263ca --- /dev/null +++ b/cpp/include/raft/linalg/qr.cuh @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __QR_H +#define __QR_H + +#pragma once + +#include "detail/qr.cuh" + +namespace raft { +namespace linalg { + +/** + * @defgroup QRdecomp QR decomposition + * @{ + */ + +/** + * @brief compute QR decomp and return only Q matrix + * @param handle: raft handle + * @param M: input matrix + * @param Q: Q matrix to be returned (on GPU) + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param stream cuda stream + * @{ + */ +template +void qrGetQ(const raft::handle_t& handle, + const math_t* M, + math_t* Q, + int n_rows, + int n_cols, + cudaStream_t stream) +{ + detail::qrGetQ(handle, M, Q, n_rows, n_cols, stream); +} + +/** + * @brief compute QR decomp and return both Q and R matrices + * @param handle: raft handle + * @param M: input matrix + * @param Q: Q matrix to be returned (on GPU) + * @param R: R matrix to be returned (on GPU) + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param stream cuda stream + */ +template +void qrGetQR(const raft::handle_t& handle, + math_t* M, + math_t* Q, + math_t* R, + int n_rows, + int n_cols, + cudaStream_t stream) +{ + detail::qrGetQR(handle, M, Q, R, n_rows, n_cols, stream); +} +/** @} */ + +}; // namespace linalg +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/qr.hpp b/cpp/include/raft/linalg/qr.hpp index 50e97e4069..da8736b46f 100644 --- a/cpp/include/raft/linalg/qr.hpp +++ b/cpp/include/raft/linalg/qr.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __QR_H +#define __QR_H #pragma once @@ -72,3 +79,5 @@ void qrGetQR(const raft::handle_t& handle, }; // namespace linalg }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh new file mode 100644 index 0000000000..7640da8c2d --- /dev/null +++ b/cpp/include/raft/linalg/reduce.cuh @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __REDUCE_H +#define __REDUCE_H + +#pragma once + +#include "detail/reduce.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Compute reduction of the input matrix along the requested dimension + * + * @tparam InType the data type of the input + * @tparam OutType the data type of the output (as well as the data type for + * which reduction is performed) + * @tparam IdxType data type of the indices of the array + * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*MainLambda)(InType, IdxType);
+ * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*ReduceLambda)(OutType);
+ * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*FinalLambda)(OutType);
+ * @param dots the output reduction vector + * @param data the input matrix + * @param D number of columns + * @param N number of rows + * @param init initial value to use for the reduction + * @param rowMajor input matrix is row-major or not + * @param alongRows whether to reduce along rows or columns + * @param stream cuda stream where to launch work + * @param inplace reduction result added inplace or overwrites old values? + * @param main_op elementwise operation to apply before reduction + * @param reduce_op binary reduction operation + * @param final_op elementwise operation to apply before storing results + */ +template , + typename ReduceLambda = raft::Sum, + typename FinalLambda = raft::Nop> +void reduce(OutType* dots, + const InType* data, + int D, + int N, + OutType init, + bool rowMajor, + bool alongRows, + cudaStream_t stream, + bool inplace = false, + MainLambda main_op = raft::Nop(), + ReduceLambda reduce_op = raft::Sum(), + FinalLambda final_op = raft::Nop()) +{ + detail::reduce( + dots, data, D, N, init, rowMajor, alongRows, stream, inplace, main_op, reduce_op, final_op); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.hpp b/cpp/include/raft/linalg/reduce.hpp index 1c4ef70df8..b9f057771a 100644 --- a/cpp/include/raft/linalg/reduce.hpp +++ b/cpp/include/raft/linalg/reduce.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __REDUCE_H +#define __REDUCE_H #pragma once -#include "detail/reduce.hpp" +#include "detail/reduce.cuh" namespace raft { namespace linalg { @@ -75,3 +82,5 @@ void reduce(OutType* dots, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh index 82d272671c..2336639258 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __REDUCE_COLS_BY_KEY +#define __REDUCE_COLS_BY_KEY #pragma once @@ -52,3 +54,5 @@ void reduce_cols_by_key(const T* data, } }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.hpp b/cpp/include/raft/linalg/reduce_cols_by_key.hpp new file mode 100644 index 0000000000..a338d8572b --- /dev/null +++ b/cpp/include/raft/linalg/reduce_cols_by_key.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __REDUCE_COLS_BY_KEY +#define __REDUCE_COLS_BY_KEY + +#pragma once + +#include + +namespace raft { +namespace linalg { + +/** + * @brief Computes the sum-reduction of matrix columns for each given key + * @tparam T the input data type (as well as the output reduced matrix) + * @tparam KeyType data type of the keys + * @tparam IdxType indexing arithmetic type + * @param data the input data (dim = nrows x ncols). This is assumed to be in + * row-major layout + * @param keys keys array (len = ncols). It is assumed that each key in this + * array is between [0, nkeys). In case this is not true, the caller is expected + * to have called make_monotonic primitive to prepare such a contiguous and + * monotonically increasing keys array. + * @param out the output reduced matrix along columns (dim = nrows x nkeys). + * This will be assumed to be in row-major layout + * @param nrows number of rows in the input data + * @param ncols number of colums in the input data + * @param nkeys number of unique keys in the keys array + * @param stream cuda stream to launch the kernel onto + */ +template +void reduce_cols_by_key(const T* data, + const KeyIteratorT keys, + T* out, + IdxType nrows, + IdxType ncols, + IdxType nkeys, + cudaStream_t stream) +{ + detail::reduce_cols_by_key(data, keys, out, nrows, ncols, nkeys, stream); +} +}; // end namespace linalg +}; // end namespace raft +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh index 76d4ed4971..ca7a956986 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __REDUCE_ROWS_BY_KEY +#define __REDUCE_ROWS_BY_KEY #pragma once @@ -108,3 +110,5 @@ void reduce_rows_by_key(const DataIteratorT d_A, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.hpp b/cpp/include/raft/linalg/reduce_rows_by_key.hpp new file mode 100644 index 0000000000..70ce9eaa4f --- /dev/null +++ b/cpp/include/raft/linalg/reduce_rows_by_key.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __REDUCE_ROWS_BY_KEY +#define __REDUCE_ROWS_BY_KEY + +#pragma once + +#include + +namespace raft { +namespace linalg { + +/** + Small helper function to convert from int->char and char->int + Transform ncols*nrows read of int in 2*nrows reads of int + ncols*rows reads of chars +**/ +template +void convert_array(IteratorT1 dst, IteratorT2 src, int n, cudaStream_t st) +{ + detail::convert_array(dst, src, n, st); +} + +/** + * @brief Computes the weighted reduction of matrix rows for each given key + * + * @tparam DataIteratorT Random-access iterator type, for reading input matrix + * (may be a simple pointer type) + * @tparam KeysIteratorT Random-access iterator type, for reading input keys + * (may be a simple pointer type) + * + * @param[in] d_A Input data array (lda x nrows) + * @param[in] lda Real row size for input data, d_A + * @param[in] d_keys Keys for each row (1 x nrows) + * @param[in] d_weights Weights for each observation in d_A (1 x nrows) + * @param[out] d_keys_char Scratch memory for conversion of keys to char + * @param[in] nrows Number of rows in d_A and d_keys + * @param[in] ncols Number of data columns in d_A + * @param[in] nkeys Number of unique keys in d_keys + * @param[out] d_sums Row sums by key (ncols x d_keys) + * @param[in] stream CUDA stream + */ +template +void reduce_rows_by_key(const DataIteratorT d_A, + int lda, + const KeysIteratorT d_keys, + const WeightT* d_weights, + char* d_keys_char, + int nrows, + int ncols, + int nkeys, + DataIteratorT d_sums, + cudaStream_t stream) +{ + detail::reduce_rows_by_key( + d_A, lda, d_keys, d_weights, d_keys_char, nrows, ncols, nkeys, d_sums, stream); +} + +/** + * @brief Computes the reduction of matrix rows for each given key + * @tparam DataIteratorT Random-access iterator type, for reading input matrix (may be a simple + * pointer type) + * @tparam KeysIteratorT Random-access iterator type, for reading input keys (may be a simple + * pointer type) + * @param[in] d_A Input data array (lda x nrows) + * @param[in] lda Real row size for input data, d_A + * @param[in] d_keys Keys for each row (1 x nrows) + * @param d_keys_char Scratch memory for conversion of keys to char + * @param[in] nrows Number of rows in d_A and d_keys + * @param[in] ncols Number of data columns in d_A + * @param[in] nkeys Number of unique keys in d_keys + * @param[out] d_sums Row sums by key (ncols x d_keys) + * @param[in] stream CUDA stream + */ +template +void reduce_rows_by_key(const DataIteratorT d_A, + int lda, + const KeysIteratorT d_keys, + char* d_keys_char, + int nrows, + int ncols, + int nkeys, + DataIteratorT d_sums, + cudaStream_t stream) +{ + typedef typename std::iterator_traits::value_type DataType; + reduce_rows_by_key(d_A, + lda, + d_keys, + static_cast(nullptr), + d_keys_char, + nrows, + ncols, + nkeys, + d_sums, + stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh index d1d739489f..f5eaba7526 100644 --- a/cpp/include/raft/linalg/rsvd.cuh +++ b/cpp/include/raft/linalg/rsvd.cuh @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __RSVD_H +#define __RSVD_H #pragma once @@ -137,3 +139,5 @@ void rsvdPerc(const raft::handle_t& handle, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.hpp b/cpp/include/raft/linalg/rsvd.hpp new file mode 100644 index 0000000000..2dd5faa332 --- /dev/null +++ b/cpp/include/raft/linalg/rsvd.hpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __RSVD_H +#define __RSVD_H + +#pragma once + +#include + +namespace raft { +namespace linalg { + +/** + * @brief randomized singular value decomposition (RSVD) on the column major + * float type input matrix (Jacobi-based), by specifying no. of PCs and + * upsamples directly + * @param handle: raft handle + * @param M: input matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param S_vec: singular values of input matrix + * @param U: left singular values of input matrix + * @param V: right singular values of input matrix + * @param k: no. of singular values to be computed + * @param p: no. of upsamples + * @param use_bbt: whether use eigen decomposition in computation or not + * @param gen_left_vec: left vector needs to be generated or not? + * @param gen_right_vec: right vector needs to be generated or not? + * @param use_jacobi: whether to jacobi solver for decomposition + * @param tol: tolerance for Jacobi-based solvers + * @param max_sweeps: maximum number of sweeps for Jacobi-based solvers + * @param stream cuda stream + */ +template +void rsvdFixedRank(const raft::handle_t& handle, + math_t* M, + int n_rows, + int n_cols, + math_t* S_vec, + math_t* U, + math_t* V, + int k, + int p, + bool use_bbt, + bool gen_left_vec, + bool gen_right_vec, + bool use_jacobi, + math_t tol, + int max_sweeps, + cudaStream_t stream) +{ + detail::rsvdFixedRank(handle, + M, + n_rows, + n_cols, + S_vec, + U, + V, + k, + p, + use_bbt, + gen_left_vec, + gen_right_vec, + use_jacobi, + tol, + max_sweeps, + stream); +} + +/** + * @brief randomized singular value decomposition (RSVD) on the column major + * float type input matrix (Jacobi-based), by specifying the PC and upsampling + * ratio + * @param handle: raft handle + * @param M: input matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param S_vec: singular values of input matrix + * @param U: left singular values of input matrix + * @param V: right singular values of input matrix + * @param PC_perc: percentage of singular values to be computed + * @param UpS_perc: upsampling percentage + * @param use_bbt: whether use eigen decomposition in computation or not + * @param gen_left_vec: left vector needs to be generated or not? + * @param gen_right_vec: right vector needs to be generated or not? + * @param use_jacobi: whether to jacobi solver for decomposition + * @param tol: tolerance for Jacobi-based solvers + * @param max_sweeps: maximum number of sweeps for Jacobi-based solvers + * @param stream cuda stream + */ +template +void rsvdPerc(const raft::handle_t& handle, + math_t* M, + int n_rows, + int n_cols, + math_t* S_vec, + math_t* U, + math_t* V, + math_t PC_perc, + math_t UpS_perc, + bool use_bbt, + bool gen_left_vec, + bool gen_right_vec, + bool use_jacobi, + math_t tol, + int max_sweeps, + cudaStream_t stream) +{ + detail::rsvdPerc(handle, + M, + n_rows, + n_cols, + S_vec, + U, + V, + PC_perc, + UpS_perc, + use_bbt, + gen_left_vec, + gen_right_vec, + use_jacobi, + tol, + max_sweeps, + stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh index c431cfdcc0..b58bc752ac 100644 --- a/cpp/include/raft/linalg/sqrt.cuh +++ b/cpp/include/raft/linalg/sqrt.cuh @@ -13,11 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __SQRT_H +#define __SQRT_H #pragma once #include -#include +#include namespace raft { namespace linalg { @@ -42,3 +44,5 @@ void sqrt(math_t* out, const math_t* in, IdxType len, cudaStream_t stream) }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.hpp b/cpp/include/raft/linalg/sqrt.hpp new file mode 100644 index 0000000000..9856173248 --- /dev/null +++ b/cpp/include/raft/linalg/sqrt.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SQRT_H +#define __SQRT_H + +#pragma once + +#include +#include + +namespace raft { +namespace linalg { + +/** + * @defgroup ScalarOps Scalar operations on the input buffer + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out the output buffer + * @param in the input buffer + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + * @{ + */ +template +void sqrt(math_t* out, const math_t* in, IdxType len, cudaStream_t stream) +{ + raft::linalg::unaryOp( + out, in, len, [] __device__(math_t in) { return raft::mySqrt(in); }, stream); +} +/** @} */ + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh new file mode 100644 index 0000000000..941e64dcb1 --- /dev/null +++ b/cpp/include/raft/linalg/strided_reduction.cuh @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __STRIDED_REDUCTION_H +#define __STRIDED_REDUCTION_H + +#pragma once + +#include "detail/strided_reduction.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Compute reduction of the input matrix along the strided dimension + * + * @tparam InType the data type of the input + * @tparam OutType the data type of the output (as well as the data type for + * which reduction is performed) + * @tparam IdxType data type of the indices of the array + * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*MainLambda)(InType, IdxType);
+ * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*ReduceLambda)(OutType);
+ * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) + * It must be a 'callable' supporting the following input and output: + *
OutType (*FinalLambda)(OutType);
+ * @param dots the output reduction vector + * @param data the input matrix + * @param D leading dimension of data + * @param N second dimension data + * @param init initial value to use for the reduction + * @param main_op elementwise operation to apply before reduction + * @param reduce_op binary reduction operation + * @param final_op elementwise operation to apply before storing results + * @param inplace reduction result added inplace or overwrites old values? + * @param stream cuda stream where to launch work + */ +template , + typename ReduceLambda = raft::Sum, + typename FinalLambda = raft::Nop> +void stridedReduction(OutType* dots, + const InType* data, + IdxType D, + IdxType N, + OutType init, + cudaStream_t stream, + bool inplace = false, + MainLambda main_op = raft::Nop(), + ReduceLambda reduce_op = raft::Sum(), + FinalLambda final_op = raft::Nop()) +{ + detail::stridedReduction(dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.hpp b/cpp/include/raft/linalg/strided_reduction.hpp index 0f97323e5a..534f8edcf7 100644 --- a/cpp/include/raft/linalg/strided_reduction.hpp +++ b/cpp/include/raft/linalg/strided_reduction.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __STRIDED_REDUCTION_H +#define __STRIDED_REDUCTION_H #pragma once @@ -70,3 +77,5 @@ void stridedReduction(OutType* dots, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh new file mode 100644 index 0000000000..9ca36ddddf --- /dev/null +++ b/cpp/include/raft/linalg/subtract.cuh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SUBTRACT_H +#define __SUBTRACT_H + +#pragma once + +#include "detail/subtract.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief Elementwise scalar subtraction operation on the input buffer + * + * @tparam InT input data-type. Also the data-type upon which the math ops + * will be performed + * @tparam OutT output data-type + * @tparam IdxType Integer type used to for addressing + * + * @param out the output buffer + * @param in the input buffer + * @param scalar the scalar used in the operations + * @param len number of elements in the input buffer + * @param stream cuda stream where to launch work + */ +template +void subtractScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream) +{ + detail::subtractScalar(out, in, scalar, len, stream); +} + +/** + * @brief Elementwise subtraction operation on the input buffers + * @tparam InT input data-type. Also the data-type upon which the math ops + * will be performed + * @tparam OutT output data-type + * @tparam IdxType Integer type used to for addressing + * + * @param out the output buffer + * @param in1 the first input buffer + * @param in2 the second input buffer + * @param len number of elements in the input buffers + * @param stream cuda stream where to launch work + */ +template +void subtract(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream) +{ + detail::subtract(out, in1, in2, len, stream); +} + +/** Substract single value pointed by singleScalarDev parameter in device memory from inDev[i] and + * write result to outDev[i] + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param outDev the output buffer + * @param inDev the input buffer + * @param singleScalarDev pointer to the scalar located in device memory + * @param len number of elements in the input and output buffer + * @param stream cuda stream + * @remark block size has not been tuned + */ +template +void subtractDevScalar(math_t* outDev, + const math_t* inDev, + const math_t* singleScalarDev, + IdxType len, + cudaStream_t stream) +{ + detail::subtractDevScalar(outDev, inDev, singleScalarDev, len, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.hpp b/cpp/include/raft/linalg/subtract.hpp index 9d48948cad..2420ce69e2 100644 --- a/cpp/include/raft/linalg/subtract.hpp +++ b/cpp/include/raft/linalg/subtract.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SUBTRACT_H +#define __SUBTRACT_H #pragma once @@ -83,3 +90,5 @@ void subtractDevScalar(math_t* outDev, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh new file mode 100644 index 0000000000..b48def90a3 --- /dev/null +++ b/cpp/include/raft/linalg/svd.cuh @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SVD_H +#define __SVD_H + +#pragma once + +#include "detail/svd.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief singular value decomposition (SVD) on the column major float type + * input matrix using QR method + * @param handle: raft handle + * @param in: input matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param sing_vals: singular values of input matrix + * @param left_sing_vecs: left singular values of input matrix + * @param right_sing_vecs: right singular values of input matrix + * @param trans_right: transpose right vectors or not + * @param gen_left_vec: generate left eig vector. Not activated. + * @param gen_right_vec: generate right eig vector. Not activated. + * @param stream cuda stream + */ +// TODO: activate gen_left_vec and gen_right_vec options +// TODO: couldn't template this function due to cusolverDnSgesvd and +// cusolverSnSgesvd. Check if there is any other way. +template +void svdQR(const raft::handle_t& handle, + T* in, + int n_rows, + int n_cols, + T* sing_vals, + T* left_sing_vecs, + T* right_sing_vecs, + bool trans_right, + bool gen_left_vec, + bool gen_right_vec, + cudaStream_t stream) +{ + detail::svdQR(handle, + in, + n_rows, + n_cols, + sing_vals, + left_sing_vecs, + right_sing_vecs, + trans_right, + gen_left_vec, + gen_right_vec, + stream); +} + +template +void svdEig(const raft::handle_t& handle, + T* in, + int n_rows, + int n_cols, + T* S, + T* U, + T* V, + bool gen_left_vec, + cudaStream_t stream) +{ + detail::svdEig(handle, in, n_rows, n_cols, S, U, V, gen_left_vec, stream); +} + +/** + * @brief on the column major input matrix using Jacobi method + * @param handle: raft handle + * @param in: input matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param sing_vals: singular values of input matrix + * @param left_sing_vecs: left singular vectors of input matrix + * @param right_sing_vecs: right singular vectors of input matrix + * @param gen_left_vec: generate left eig vector. Not activated. + * @param gen_right_vec: generate right eig vector. Not activated. + * @param tol: error tolerance for the jacobi method. Algorithm stops when the + * error is below tol + * @param max_sweeps: number of sweeps in the Jacobi algorithm. The more the better + * accuracy. + * @param stream cuda stream + */ +template +void svdJacobi(const raft::handle_t& handle, + math_t* in, + int n_rows, + int n_cols, + math_t* sing_vals, + math_t* left_sing_vecs, + math_t* right_sing_vecs, + bool gen_left_vec, + bool gen_right_vec, + math_t tol, + int max_sweeps, + cudaStream_t stream) +{ + detail::svdJacobi(handle, + in, + n_rows, + n_cols, + sing_vals, + left_sing_vecs, + right_sing_vecs, + gen_left_vec, + gen_right_vec, + tol, + max_sweeps, + stream); +} + +/** + * @brief reconstruct a matrix use left and right singular vectors and + * singular values + * @param handle: raft handle + * @param U: left singular vectors of size n_rows x k + * @param S: square matrix with singular values on its diagonal, k x k + * @param V: right singular vectors of size n_cols x k + * @param out: reconstructed matrix to be returned + * @param n_rows: number rows of output matrix + * @param n_cols: number columns of output matrix + * @param k: number of singular values + * @param stream cuda stream + */ +template +void svdReconstruction(const raft::handle_t& handle, + math_t* U, + math_t* S, + math_t* V, + math_t* out, + int n_rows, + int n_cols, + int k, + cudaStream_t stream) +{ + detail::svdReconstruction(handle, U, S, V, out, n_rows, n_cols, k, stream); +} + +/** + * @brief reconstruct a matrix use left and right singular vectors and + * singular values + * @param handle: raft handle + * @param A_d: input matrix + * @param U: left singular vectors of size n_rows x k + * @param S_vec: singular values as a vector + * @param V: right singular vectors of size n_cols x k + * @param n_rows: number rows of output matrix + * @param n_cols: number columns of output matrix + * @param k: number of singular values to be computed, 1.0 for normal SVD + * @param tol: tolerance for the evaluation + * @param stream cuda stream + */ +template +bool evaluateSVDByL2Norm(const raft::handle_t& handle, + math_t* A_d, + math_t* U, + math_t* S_vec, + math_t* V, + int n_rows, + int n_cols, + int k, + math_t tol, + cudaStream_t stream) +{ + return detail::evaluateSVDByL2Norm(handle, A_d, U, S_vec, V, n_rows, n_cols, k, tol, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.hpp b/cpp/include/raft/linalg/svd.hpp index a30180b174..765f364d5b 100644 --- a/cpp/include/raft/linalg/svd.hpp +++ b/cpp/include/raft/linalg/svd.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SVD_H +#define __SVD_H #pragma once -#include "detail/svd.hpp" +#include "detail/svd.cuh" namespace raft { namespace linalg { @@ -182,3 +189,5 @@ bool evaluateSVDByL2Norm(const raft::handle_t& handle, }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh index be411e6492..158cca168d 100644 --- a/cpp/include/raft/linalg/ternary_op.cuh +++ b/cpp/include/raft/linalg/ternary_op.cuh @@ -14,6 +14,9 @@ * limitations under the License. */ +#ifndef __TERNARY_OP_H +#define __TERNARY_OP_H + #pragma once #include @@ -47,4 +50,6 @@ void ternaryOp(math_t* out, } }; // end namespace linalg -}; // end namespace raft \ No newline at end of file +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.hpp b/cpp/include/raft/linalg/ternary_op.hpp new file mode 100644 index 0000000000..1e8892211c --- /dev/null +++ b/cpp/include/raft/linalg/ternary_op.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __TERNARY_OP_H +#define __TERNARY_OP_H + +#pragma once + +#include + +namespace raft { +namespace linalg { +/** + * @brief perform element-wise ternary operation on the input arrays + * @tparam math_t data-type upon which the math operation will be performed + * @tparam Lambda the device-lambda performing the actual operation + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads-per-block in the final kernel launched + * @param out the output array + * @param in1 the first input array + * @param in2 the second input array + * @param in3 the third input array + * @param len number of elements in the input array + * @param op the device-lambda + * @param stream cuda stream where to launch work + */ +template +void ternaryOp(math_t* out, + const math_t* in1, + const math_t* in2, + const math_t* in3, + IdxType len, + Lambda op, + cudaStream_t stream) +{ + detail::ternaryOp(out, in1, in2, in3, len, op, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh new file mode 100644 index 0000000000..a9ada5125a --- /dev/null +++ b/cpp/include/raft/linalg/transpose.cuh @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __TRANSPOSE_H +#define __TRANSPOSE_H + +#pragma once + +#include "detail/transpose.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief transpose on the column major input matrix using Jacobi method + * @param handle: raft handle + * @param in: input matrix + * @param out: output. Transposed input matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param stream: cuda stream + */ +template +void transpose(const raft::handle_t& handle, + math_t* in, + math_t* out, + int n_rows, + int n_cols, + cudaStream_t stream) +{ + detail::transpose(handle, in, out, n_rows, n_cols, stream); +} + +/** + * @brief transpose on the column major input matrix using Jacobi method + * @param inout: input and output matrix + * @param n: number of rows and columns of input matrix + * @param stream: cuda stream + */ +template +void transpose(math_t* inout, int n, cudaStream_t stream) +{ + detail::transpose(inout, n, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/transpose.hpp b/cpp/include/raft/linalg/transpose.hpp index 50608877fa..765d523b16 100644 --- a/cpp/include/raft/linalg/transpose.hpp +++ b/cpp/include/raft/linalg/transpose.hpp @@ -13,10 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __TRANSPOSE_H +#define __TRANSPOSE_H #pragma once -#include "detail/transpose.hpp" +#include "detail/transpose.cuh" namespace raft { namespace linalg { @@ -55,3 +62,5 @@ void transpose(math_t* inout, int n, cudaStream_t stream) }; // end namespace linalg }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh new file mode 100644 index 0000000000..f2466df463 --- /dev/null +++ b/cpp/include/raft/linalg/unary_op.cuh @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __UNARY_OP_H +#define __UNARY_OP_H + +#pragma once + +#include "detail/unary_op.cuh" + +namespace raft { +namespace linalg { + +/** + * @brief perform element-wise unary operation in the input array + * @tparam InType input data-type + * @tparam Lambda the device-lambda performing the actual operation + * @tparam OutType output data-type + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads-per-block in the final kernel launched + * @param out the output array + * @param in the input array + * @param len number of elements in the input array + * @param op the device-lambda + * @param stream cuda stream where to launch work + * @note Lambda must be a functor with the following signature: + * `OutType func(const InType& val);` + */ +template +void unaryOp(OutType* out, const InType* in, IdxType len, Lambda op, cudaStream_t stream) +{ + detail::unaryOpCaller(out, in, len, op, stream); +} + +/** + * @brief Perform an element-wise unary operation into the output array + * + * Compared to `unaryOp()`, this method does not do any reads from any inputs + * + * @tparam OutType output data-type + * @tparam Lambda the device-lambda performing the actual operation + * @tparam IdxType Integer type used to for addressing + * @tparam TPB threads-per-block in the final kernel launched + * + * @param[out] out the output array [on device] [len = len] + * @param[in] len number of elements in the input array + * @param[in] op the device-lambda which must be of the form: + * `void func(OutType* outLocationOffset, IdxType idx);` + * where outLocationOffset will be out + idx. + * @param[in] stream cuda stream where to launch work + */ +template +void writeOnlyUnaryOp(OutType* out, IdxType len, Lambda op, cudaStream_t stream) +{ + detail::writeOnlyUnaryOpCaller(out, len, op, stream); +} + +}; // end namespace linalg +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/unary_op.hpp b/cpp/include/raft/linalg/unary_op.hpp index 51faa2e4a4..12d841340b 100644 --- a/cpp/include/raft/linalg/unary_op.hpp +++ b/cpp/include/raft/linalg/unary_op.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __UNARY_OP_H +#define __UNARY_OP_H #pragma once @@ -71,3 +78,5 @@ void writeOnlyUnaryOp(OutType* out, IdxType len, Lambda op, cudaStream_t stream) }; // end namespace linalg }; // end namespace raft + +#endif diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh new file mode 100644 index 0000000000..afdec24ebd --- /dev/null +++ b/cpp/include/raft/matrix/col_wise_sort.cuh @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __COL_WISE_SORT_H +#define __COL_WISE_SORT_H + +#pragma once + +#include + +namespace raft { +namespace matrix { + +/** + * @brief sort columns within each row of row-major input matrix and return sorted indexes + * modelled as key-value sort with key being input matrix and value being index of values + * @param in: input matrix + * @param out: output value(index) matrix + * @param n_rows: number rows of input matrix + * @param n_columns: number columns of input matrix + * @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize + * @param workspacePtr: pointer to workspace memory + * @param workspaceSize: Size of workspace to be allocated + * @param stream: cuda stream to execute prim on + * @param sortedKeys: Optional, output matrix for sorted keys (input) + */ +template +void sort_cols_per_row(const InType* in, + OutType* out, + int n_rows, + int n_columns, + bool& bAllocWorkspace, + void* workspacePtr, + size_t& workspaceSize, + cudaStream_t stream, + InType* sortedKeys = nullptr) +{ + detail::sortColumnsPerRow( + in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, stream, sortedKeys); +} +}; // end namespace matrix +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp index 7ace5881bc..f259bc71a8 100644 --- a/cpp/include/raft/matrix/col_wise_sort.hpp +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __COL_WISE_SORT_H +#define __COL_WISE_SORT_H #pragma once @@ -50,3 +57,5 @@ void sort_cols_per_row(const InType* in, } }; // end namespace matrix }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh index 6b32cbc06e..9e996e19d9 100644 --- a/cpp/include/raft/matrix/detail/math.cuh +++ b/cpp/include/raft/matrix/detail/math.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +20,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/raft/matrix/detail/matrix.cuh b/cpp/include/raft/matrix/detail/matrix.cuh index f057ba283c..3fa602d865 100644 --- a/cpp/include/raft/matrix/detail/matrix.cuh +++ b/cpp/include/raft/matrix/detail/matrix.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/matrix/math.cuh b/cpp/include/raft/matrix/math.cuh new file mode 100644 index 0000000000..9e103afda5 --- /dev/null +++ b/cpp/include/raft/matrix/math.cuh @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MATH_H +#define __MATH_H + +#pragma once + +#include "detail/math.cuh" + +namespace raft { +namespace matrix { + +/** + * @defgroup MatrixMathOp math operation on the input matrix + * @{ + */ + +/** + * @brief Power of every element in the input matrix + * @param in: input matrix + * @param out: output matrix. The result is stored in the out matrix + * @param scalar: every element is multiplied with scalar. + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void power(math_t* in, math_t* out, math_t scalar, int len, cudaStream_t stream) +{ + detail::power(in, out, scalar, len, stream); +} + +/** + * @brief Power of every element in the input matrix + * @param inout: input matrix and also the result is stored + * @param scalar: every element is multiplied with scalar. + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void power(math_t* inout, math_t scalar, int len, cudaStream_t stream) +{ + detail::power(inout, scalar, len, stream); +} + +/** + * @brief Power of every element in the input matrix + * @param inout: input matrix and also the result is stored + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void power(math_t* inout, int len, cudaStream_t stream) +{ + detail::power(inout, len, stream); +} + +/** + * @brief Power of every element in the input matrix + * @param in: input matrix + * @param out: output matrix. The result is stored in the out matrix + * @param len: number elements of input matrix + * @param stream cuda stream + * @{ + */ +template +void power(math_t* in, math_t* out, int len, cudaStream_t stream) +{ + detail::power(in, out, len, stream); +} + +/** + * @brief Square root of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param in: input matrix and also the result is stored + * @param out: output matrix. The result is stored in the out matrix + * @param scalar: every element is multiplied with scalar + * @param len: number elements of input matrix + * @param stream cuda stream + * @param set_neg_zero whether to set negative numbers to zero + */ +template +void seqRoot(math_t* in, + math_t* out, + math_t scalar, + IdxType len, + cudaStream_t stream, + bool set_neg_zero = false) +{ + detail::seqRoot(in, out, scalar, len, stream, set_neg_zero); +} + +/** + * @brief Square root of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param inout: input matrix and also the result is stored + * @param scalar: every element is multiplied with scalar + * @param len: number elements of input matrix + * @param stream cuda stream + * @param set_neg_zero whether to set negative numbers to zero + */ +template +void seqRoot( + math_t* inout, math_t scalar, IdxType len, cudaStream_t stream, bool set_neg_zero = false) +{ + detail::seqRoot(inout, scalar, len, stream, set_neg_zero); +} + +/** + * @brief Square root of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param in: input matrix and also the result is stored + * @param out: output matrix. The result is stored in the out matrix + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void seqRoot(math_t* in, math_t* out, IdxType len, cudaStream_t stream) +{ + detail::seqRoot(in, out, len, stream); +} + +/** + * @brief Square root of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param inout: input matrix with in-place results + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void seqRoot(math_t* inout, IdxType len, cudaStream_t stream) +{ + detail::seqRoot(inout, len, stream); +} + +/** + * @brief sets the small values to zero based on a defined threshold + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param out: output matrix. The result is stored in the out matrix + * @param in: input matrix + * @param len: number elements of input matrix + * @param stream cuda stream + * @param thres threshold to set values to zero + */ +template +void setSmallValuesZero( + math_t* out, const math_t* in, IdxType len, cudaStream_t stream, math_t thres = 1e-15) +{ + detail::setSmallValuesZero(out, in, len, stream, thres); +} + +/** + * @brief sets the small values to zero based on a defined threshold + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param inout: input matrix and also the result is stored + * @param len: number elements of input matrix + * @param stream cuda stream + * @param thres: threshold + */ +template +void setSmallValuesZero(math_t* inout, IdxType len, cudaStream_t stream, math_t thres = 1e-15) +{ + detail::setSmallValuesZero(inout, len, stream, thres); +} + +/** + * @brief Reciprocal of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param in: input matrix and also the result is stored + * @param out: output matrix. The result is stored in the out matrix + * @param scalar: every element is multiplied with scalar + * @param len: number elements of input matrix + * @param stream cuda stream + * @param setzero round down to zero if the input is less the threshold + * @param thres the threshold used to forcibly set inputs to zero + * @{ + */ +template +void reciprocal(math_t* in, + math_t* out, + math_t scalar, + int len, + cudaStream_t stream, + bool setzero = false, + math_t thres = 1e-15) +{ + detail::reciprocal(in, out, scalar, len, stream, setzero, thres); +} + +/** + * @brief Reciprocal of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param inout: input matrix with in-place results + * @param scalar: every element is multiplied with scalar + * @param len: number elements of input matrix + * @param stream cuda stream + * @param setzero round down to zero if the input is less the threshold + * @param thres the threshold used to forcibly set inputs to zero + * @{ + */ +template +void reciprocal(math_t* inout, + math_t scalar, + IdxType len, + cudaStream_t stream, + bool setzero = false, + math_t thres = 1e-15) +{ + detail::reciprocal(inout, scalar, len, stream, setzero, thres); +} + +/** + * @brief Reciprocal of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param inout: input matrix and also the result is stored + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void reciprocal(math_t* inout, IdxType len, cudaStream_t stream) +{ + detail::reciprocal(inout, len, stream); +} + +/** + * @brief Reciprocal of every element in the input matrix + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param in: input matrix and also the result is stored + * @param out: output matrix. The result is stored in the out matrix + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void reciprocal(math_t* in, math_t* out, IdxType len, cudaStream_t stream) +{ + detail::reciprocal(in, out, len, stream); +} + +/** + * @brief set values to scalar in matrix + * @tparam math_t data-type upon which the math operation will be performed + * @param out output matrix. The result is stored in the out matrix + * @param in input matrix + * @param scalar svalar value + * @param len number elements of input matrix + * @param stream cuda stream + */ +template +void setValue(math_t* out, const math_t* in, math_t scalar, int len, cudaStream_t stream = 0) +{ + detail::setValue(out, in, scalar, len, stream); +} + +/** + * @brief ratio of every element over sum of input vector is calculated + * @tparam math_t data-type upon which the math operation will be performed + * @tparam IdxType Integer type used to for addressing + * @param handle + * @param src: input matrix + * @param dest: output matrix. The result is stored in the dest matrix + * @param len: number elements of input matrix + * @param stream cuda stream + */ +template +void ratio( + const raft::handle_t& handle, math_t* src, math_t* dest, IdxType len, cudaStream_t stream) +{ + detail::ratio(handle, src, dest, len, stream); +} + +/** @} */ + +/** + * @brief Argmax: find the row idx with maximum value for each column + * @param in: input matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param out: output vector of size n_cols + * @param stream: cuda stream + */ +template +void argmax(const math_t* in, int n_rows, int n_cols, math_t* out, cudaStream_t stream) +{ + detail::argmax(in, n_rows, n_cols, out, stream); +} + +/** + * @brief sign flip for PCA. This is used to stabilize the sign of column + * major eigen vectors. Flips the sign if the column has negative |max|. + * @param inout: input matrix. Result also stored in this parameter + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param stream cuda stream + */ +template +void signFlip(math_t* inout, int n_rows, int n_cols, cudaStream_t stream) +{ + detail::signFlip(inout, n_rows, n_cols, stream); +} + +/** + * @brief multiply each row or column of matrix with vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + */ +template +void matrixVectorBinaryMult(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream) +{ + detail::matrixVectorBinaryMult( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream); +} + +/** + * @brief multiply each row or column of matrix with vector, skipping zeros in vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + */ +template +void matrixVectorBinaryMultSkipZero(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream) +{ + detail::matrixVectorBinaryMultSkipZero( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream); +} + +/** + * @brief divide each row or column of matrix with vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + */ +template +void matrixVectorBinaryDiv(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream) +{ + detail::matrixVectorBinaryDiv( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream); +} + +/** + * @brief divide each row or column of matrix with vector, skipping zeros in vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + * @param return_zero result is zero if true and vector value is below threshold, original value if + * false + */ +template +void matrixVectorBinaryDivSkipZero(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream, + bool return_zero = false) +{ + detail::matrixVectorBinaryDivSkipZero( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream, return_zero); +} + +/** + * @brief add each row or column of matrix with vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + */ +template +void matrixVectorBinaryAdd(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream) +{ + detail::matrixVectorBinaryAdd( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream); +} + +/** + * @brief subtract each row or column of matrix with vector + * @param data input matrix, results are in-place + * @param vec input vector + * @param n_row number of rows of input matrix + * @param n_col number of columns of input matrix + * @param rowMajor whether matrix is row major + * @param bcastAlongRows whether to broadcast vector along rows of matrix or columns + * @param stream cuda stream + */ +template +void matrixVectorBinarySub(Type* data, + const Type* vec, + IdxType n_row, + IdxType n_col, + bool rowMajor, + bool bcastAlongRows, + cudaStream_t stream) +{ + detail::matrixVectorBinarySub( + data, vec, n_row, n_col, rowMajor, bcastAlongRows, stream); +} + +}; // end namespace matrix +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/math.hpp b/cpp/include/raft/matrix/math.hpp index 619e20a702..ab02c8a85f 100644 --- a/cpp/include/raft/matrix/math.hpp +++ b/cpp/include/raft/matrix/math.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MATH_H +#define __MATH_H #pragma once @@ -461,3 +468,5 @@ void matrixVectorBinarySub(Type* data, }; // end namespace matrix }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh new file mode 100644 index 0000000000..1af7e37dec --- /dev/null +++ b/cpp/include/raft/matrix/matrix.cuh @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MATRIX_H +#define __MATRIX_H + +#pragma once + +#include "detail/linewise_op.cuh" +#include "detail/matrix.cuh" + +#include + +namespace raft { +namespace matrix { + +using namespace std; + +/** + * @brief Copy selected rows of the input matrix into contiguous space. + * + * On exit out[i + k*n_rows] = in[indices[i] + k*n_rows], + * where i = 0..n_rows_indices-1, and k = 0..n_cols-1. + * + * @param in input matrix + * @param n_rows number of rows of output matrix + * @param n_cols number of columns of output matrix + * @param out output matrix + * @param indices of the rows to be copied + * @param n_rows_indices number of rows to copy + * @param stream cuda stream + * @param rowMajor whether the matrix has row major layout + */ +template +void copyRows(const m_t* in, + idx_t n_rows, + idx_t n_cols, + m_t* out, + const idx_array_t* indices, + idx_t n_rows_indices, + cudaStream_t stream, + bool rowMajor = false) +{ + detail::copyRows(in, n_rows, n_cols, out, indices, n_rows_indices, stream, rowMajor); +} + +/** + * @brief copy matrix operation for column major matrices. + * @param in: input matrix + * @param out: output matrix + * @param n_rows: number of rows of output matrix + * @param n_cols: number of columns of output matrix + * @param stream: cuda stream + */ +template +void copy(const m_t* in, m_t* out, idx_t n_rows, idx_t n_cols, cudaStream_t stream) +{ + raft::copy_async(out, in, n_rows * n_cols, stream); +} + +/** + * @brief copy matrix operation for column major matrices. First n_rows and + * n_cols of input matrix "in" is copied to "out" matrix. + * @param in: input matrix + * @param in_n_rows: number of rows of input matrix + * @param out: output matrix + * @param out_n_rows: number of rows of output matrix + * @param out_n_cols: number of columns of output matrix + * @param stream: cuda stream + */ +template +void truncZeroOrigin( + m_t* in, idx_t in_n_rows, m_t* out, idx_t out_n_rows, idx_t out_n_cols, cudaStream_t stream) +{ + detail::truncZeroOrigin(in, in_n_rows, out, out_n_rows, out_n_cols, stream); +} + +/** + * @brief Columns of a column major matrix is reversed (i.e. first column and + * last column are swapped) + * @param inout: input and output matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param stream: cuda stream + */ +template +void colReverse(m_t* inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) +{ + detail::colReverse(inout, n_rows, n_cols, stream); +} + +/** + * @brief Rows of a column major matrix is reversed (i.e. first row and last + * row are swapped) + * @param inout: input and output matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param stream: cuda stream + */ +template +void rowReverse(m_t* inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) +{ + detail::rowReverse(inout, n_rows, n_cols, stream); +} + +/** + * @brief Prints the data stored in GPU memory + * @param in: input matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param h_separator: horizontal separator character + * @param v_separator: vertical separator character + * @param stream: cuda stream + */ +template +void print(const m_t* in, + idx_t n_rows, + idx_t n_cols, + char h_separator = ' ', + char v_separator = '\n', + cudaStream_t stream = rmm::cuda_stream_default) +{ + detail::print(in, n_rows, n_cols, h_separator, v_separator, stream); +} + +/** + * @brief Prints the data stored in CPU memory + * @param in: input matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + */ +template +void printHost(const m_t* in, idx_t n_rows, idx_t n_cols) +{ + detail::printHost(in, n_rows, n_cols); +} + +/** + * @brief Slice a matrix (in-place) + * @param in: input matrix + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param out: output matrix + * @param x1, y1: coordinate of the top-left point of the wanted area (0-based) + * @param x2, y2: coordinate of the bottom-right point of the wanted area + * (1-based) + * example: Slice the 2nd and 3rd columns of a 4x3 matrix: slice_matrix(M_d, 4, + * 3, 0, 1, 4, 3); + * @param stream: cuda stream + */ +template +void sliceMatrix(m_t* in, + idx_t n_rows, + idx_t n_cols, + m_t* out, + idx_t x1, + idx_t y1, + idx_t x2, + idx_t y2, + cudaStream_t stream) +{ + detail::sliceMatrix(in, n_rows, n_cols, out, x1, y1, x2, y2, stream); +} + +/** + * @brief Copy the upper triangular part of a matrix to another + * @param src: input matrix with a size of n_rows x n_cols + * @param dst: output matrix with a size of kxk, k = min(n_rows, n_cols) + * @param n_rows: number of rows of input matrix + * @param n_cols: number of columns of input matrix + * @param stream: cuda stream + */ +template +void copyUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, cudaStream_t stream) +{ + detail::copyUpperTriangular(src, dst, n_rows, n_cols, stream); +} + +/** + * @brief Initialize a diagonal matrix with a vector + * @param vec: vector of length k = min(n_rows, n_cols) + * @param matrix: matrix of size n_rows x n_cols + * @param n_rows: number of rows of the matrix + * @param n_cols: number of columns of the matrix + * @param stream: cuda stream + */ +template +void initializeDiagonalMatrix( + m_t* vec, m_t* matrix, idx_t n_rows, idx_t n_cols, cudaStream_t stream) +{ + detail::initializeDiagonalMatrix(vec, matrix, n_rows, n_cols, stream); +} + +/** + * @brief Get a square matrix with elements on diagonal reversed (in-place) + * @param in: square input matrix with size len x len + * @param len: size of one side of the matrix + * @param stream: cuda stream + */ +template +void getDiagonalInverseMatrix(m_t* in, idx_t len, cudaStream_t stream) +{ + detail::getDiagonalInverseMatrix(in, len, stream); +} + +/** + * @brief Get the L2/F-norm of a matrix/vector + * @param handle + * @param in: input matrix/vector with totally size elements + * @param size: size of the matrix/vector + * @param stream: cuda stream + */ +template +m_t getL2Norm(const raft::handle_t& handle, m_t* in, idx_t size, cudaStream_t stream) +{ + return detail::getL2Norm(handle, in, size, stream); +} + +/** + * Run a function over matrix lines (rows or columns) with a variable number + * row-vectors or column-vectors. + * The term `line` here signifies that the lines can be either columns or rows, + * depending on the matrix layout. + * What matters is if the vectors are applied along lines (indices of vectors correspond to + * indices within lines), or across lines (indices of vectors correspond to line numbers). + * + * @param [out] out result of the operation; can be same as `in`; should be aligned the same + * as `in` to allow faster vectorized memory transfers. + * @param [in] in input matrix consisting of `nLines` lines, each `lineLen`-long. + * @param [in] lineLen length of matrix line in elements (`=nCols` in row-major or `=nRows` in + * col-major) + * @param [in] nLines number of matrix lines (`=nRows` in row-major or `=nCols` in col-major) + * @param [in] alongLines whether vectors are indices along or across lines. + * @param [in] op the operation applied on each line: + * for i in [0..lineLen) and j in [0..nLines): + * out[i, j] = op(in[i, j], vec1[i], vec2[i], ... veck[i]) if alongLines = true + * out[i, j] = op(in[i, j], vec1[j], vec2[j], ... veck[j]) if alongLines = false + * where matrix indexing is row-major ([i, j] = [i + lineLen * j]). + * @param [in] stream a cuda stream for the kernels + * @param [in] vecs zero or more vectors to be passed as arguments, + * size of each vector is `alongLines ? lineLen : nLines`. + */ +template +void linewiseOp(m_t* out, + const m_t* in, + const idx_t lineLen, + const idx_t nLines, + const bool alongLines, + Lambda op, + cudaStream_t stream, + Vecs... vecs) +{ + common::nvtx::range fun_scope("linewiseOp-%c-%zu (%zu, %zu)", + alongLines ? 'l' : 'x', + sizeof...(Vecs), + size_t(lineLen), + size_t(nLines)); + detail::MatrixLinewiseOp<16, 256>::run( + out, in, lineLen, nLines, alongLines, op, stream, vecs...); +} + +}; // end namespace matrix +}; // end namespace raft + +#endif diff --git a/cpp/include/raft/matrix/matrix.hpp b/cpp/include/raft/matrix/matrix.hpp index e3e2f88d14..cf5f5d1f25 100644 --- a/cpp/include/raft/matrix/matrix.hpp +++ b/cpp/include/raft/matrix/matrix.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MATRIX_H +#define __MATRIX_H #pragma once @@ -271,3 +278,5 @@ void linewiseOp(m_t* out, }; // end namespace matrix }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp index 151c49af7c..96aa622525 100644 --- a/cpp/include/raft/mr/buffer_base.hpp +++ b/cpp/include/raft/mr/buffer_base.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp index aee3cba046..954ce83d1f 100644 --- a/cpp/include/raft/mr/device/buffer.hpp +++ b/cpp/include/raft/mr/device/buffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp index de9468add8..25aed3e725 100644 --- a/cpp/include/raft/mr/host/buffer.hpp +++ b/cpp/include/raft/mr/host/buffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/random/detail/make_blobs.cuh b/cpp/include/raft/random/detail/make_blobs.cuh index b79178567b..10ded9c93e 100644 --- a/cpp/include/raft/random/detail/make_blobs.cuh +++ b/cpp/include/raft/random/detail/make_blobs.cuh @@ -19,8 +19,8 @@ #include "permute.cuh" #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh index eb8eaf565e..8bab85e485 100644 --- a/cpp/include/raft/random/detail/make_regression.cuh +++ b/cpp/include/raft/random/detail/make_regression.cuh @@ -24,15 +24,15 @@ #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include +#include #include namespace raft::random { diff --git a/cpp/include/raft/random/detail/multi_variable_gaussian.cuh b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh index bf79b3cb71..15789742fd 100644 --- a/cpp/include/raft/random/detail/multi_variable_gaussian.cuh +++ b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh @@ -22,8 +22,8 @@ #include #include #include -#include -#include +#include +#include #include // mvg.cuh takes in matrices that are colomn major (as in fortan) diff --git a/cpp/include/raft/random/make_blobs.cuh b/cpp/include/raft/random/make_blobs.cuh new file mode 100644 index 0000000000..2ad3a7960d --- /dev/null +++ b/cpp/include/raft/random/make_blobs.cuh @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MAKE_BLOBS_H +#define __MAKE_BLOBS_H + +#pragma once + +#include "detail/make_blobs.cuh" + +namespace raft::random { + +/** + * @brief GPU-equivalent of sklearn.datasets.make_blobs + * + * @tparam DataT output data type + * @tparam IdxT indexing arithmetic type + * + * @param[out] out generated data [on device] + * [dim = n_rows x n_cols] + * @param[out] labels labels for the generated data [on device] + * [len = n_rows] + * @param[in] n_rows number of rows in the generated data + * @param[in] n_cols number of columns in the generated data + * @param[in] n_clusters number of clusters (or classes) to generate + * @param[in] stream cuda stream to schedule the work on + * @param[in] row_major whether input `centers` and output `out` + * buffers are to be stored in row or column + * major layout + * @param[in] centers centers of each of the cluster, pass a nullptr + * if you need this also to be generated randomly + * [on device] [dim = n_clusters x n_cols] + * @param[in] cluster_std standard deviation of each cluster center, + * pass a nullptr if this is to be read from the + * `cluster_std_scalar`. [on device] + * [len = n_clusters] + * @param[in] cluster_std_scalar if 'cluster_std' is nullptr, then use this as + * the std-dev across all dimensions. + * @param[in] shuffle shuffle the generated dataset and labels + * @param[in] center_box_min min value of box from which to pick cluster + * centers. Useful only if 'centers' is nullptr + * @param[in] center_box_max max value of box from which to pick cluster + * centers. Useful only if 'centers' is nullptr + * @param[in] seed seed for the RNG + * @param[in] type RNG type + */ +template +void make_blobs(DataT* out, + IdxT* labels, + IdxT n_rows, + IdxT n_cols, + IdxT n_clusters, + cudaStream_t stream, + bool row_major = true, + const DataT* centers = nullptr, + const DataT* cluster_std = nullptr, + const DataT cluster_std_scalar = (DataT)1.0, + bool shuffle = true, + DataT center_box_min = (DataT)-10.0, + DataT center_box_max = (DataT)10.0, + uint64_t seed = 0ULL, + GeneratorType type = GenPhilox) +{ + detail::make_blobs_caller(out, + labels, + n_rows, + n_cols, + n_clusters, + stream, + row_major, + centers, + cluster_std, + cluster_std_scalar, + shuffle, + center_box_min, + center_box_max, + seed, + type); +} + +} // end namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/make_blobs.hpp b/cpp/include/raft/random/make_blobs.hpp index afdabfe55b..19d4b8499b 100644 --- a/cpp/include/raft/random/make_blobs.hpp +++ b/cpp/include/raft/random/make_blobs.hpp @@ -14,6 +14,14 @@ * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MAKE_BLOBS_H +#define __MAKE_BLOBS_H + #pragma once #include "detail/make_blobs.cuh" @@ -88,4 +96,6 @@ void make_blobs(DataT* out, type); } -} // end namespace raft::random \ No newline at end of file +} // end namespace raft::random + +#endif diff --git a/cpp/include/raft/random/make_regression.cuh b/cpp/include/raft/random/make_regression.cuh new file mode 100644 index 0000000000..4fbb48fa35 --- /dev/null +++ b/cpp/include/raft/random/make_regression.cuh @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Adapted from scikit-learn + * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/_samples_generator.py + */ + +#ifndef __MAKE_REGRESSION_H +#define __MAKE_REGRESSION_H + +#pragma once + +#include + +#include "detail/make_regression.cuh" + +namespace raft::random { + +/** + * @brief GPU-equivalent of sklearn.datasets.make_regression as documented at: + * https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html + * + * @tparam DataT Scalar type + * @tparam IdxT Index type + * + * @param[in] handle RAFT handle + * @param[out] out Row-major (samples, features) matrix to store + * the problem data + * @param[out] values Row-major (samples, targets) matrix to store + * the values for the regression problem + * @param[in] n_rows Number of samples + * @param[in] n_cols Number of features + * @param[in] n_informative Number of informative features (non-zero + * coefficients) + * @param[in] stream CUDA stream + * @param[out] coef Row-major (features, targets) matrix to store + * the coefficients used to generate the values + * for the regression problem. If nullptr is + * given, nothing will be written + * @param[in] n_targets Number of targets (generated values per sample) + * @param[in] bias A scalar that will be added to the values + * @param[in] effective_rank The approximate rank of the data matrix (used + * to create correlations in the data). -1 is the + * code to use well-conditioned data + * @param[in] tail_strength The relative importance of the fat noisy tail + * of the singular values profile if + * effective_rank is not -1 + * @param[in] noise Standard deviation of the gaussian noise + * applied to the output + * @param[in] shuffle Shuffle the samples and the features + * @param[in] seed Seed for the random number generator + * @param[in] type Random generator type + */ +template +void make_regression(const raft::handle_t& handle, + DataT* out, + DataT* values, + IdxT n_rows, + IdxT n_cols, + IdxT n_informative, + cudaStream_t stream, + DataT* coef = nullptr, + IdxT n_targets = (IdxT)1, + DataT bias = (DataT)0.0, + IdxT effective_rank = (IdxT)-1, + DataT tail_strength = (DataT)0.5, + DataT noise = (DataT)0.0, + bool shuffle = true, + uint64_t seed = 0ULL, + GeneratorType type = GenPhilox) +{ + detail::make_regression_caller(handle, + out, + values, + n_rows, + n_cols, + n_informative, + stream, + coef, + n_targets, + bias, + effective_rank, + tail_strength, + noise, + shuffle, + seed, + type); +} + +} // namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/make_regression.hpp b/cpp/include/raft/random/make_regression.hpp index d6fceff466..c050a447ed 100644 --- a/cpp/include/raft/random/make_regression.hpp +++ b/cpp/include/raft/random/make_regression.hpp @@ -18,6 +18,14 @@ * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/_samples_generator.py */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MAKE_REGRESSION_H +#define __MAKE_REGRESSION_H + #pragma once #include @@ -97,4 +105,6 @@ void make_regression(const raft::handle_t& handle, type); } -} // namespace raft::random \ No newline at end of file +} // namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/multi_variable_gaussian.cuh b/cpp/include/raft/random/multi_variable_gaussian.cuh new file mode 100644 index 0000000000..1d9d63f6c5 --- /dev/null +++ b/cpp/include/raft/random/multi_variable_gaussian.cuh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MVG_H +#define __MVG_H + +#pragma once + +#include "detail/multi_variable_gaussian.cuh" + +namespace raft::random { + +template +class multi_variable_gaussian : public detail::multi_variable_gaussian_impl { + public: + // using Decomposer = typename detail::multi_variable_gaussian_impl::Decomposer; + // using detail::multi_variable_gaussian_impl::Decomposer::chol_decomp; + // using detail::multi_variable_gaussian_impl::Decomposer::jacobi; + // using detail::multi_variable_gaussian_impl::Decomposer::qr; + + multi_variable_gaussian() = delete; + multi_variable_gaussian(const raft::handle_t& handle, + const int dim, + typename detail::multi_variable_gaussian_impl::Decomposer method) + : detail::multi_variable_gaussian_impl{handle, dim, method} + { + } + + std::size_t get_workspace_size() + { + return detail::multi_variable_gaussian_impl::get_workspace_size(); + } + + void set_workspace(T* workarea) + { + detail::multi_variable_gaussian_impl::set_workspace(workarea); + } + + void give_gaussian(const int nPoints, T* P, T* X, const T* x = 0) + { + detail::multi_variable_gaussian_impl::give_gaussian(nPoints, P, X, x); + } + + void deinit() { detail::multi_variable_gaussian_impl::deinit(); } + + ~multi_variable_gaussian() { deinit(); } +}; // end of multi_variable_gaussian + +}; // end of namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/multi_variable_gaussian.hpp b/cpp/include/raft/random/multi_variable_gaussian.hpp index c2af52322a..fd1de4aadd 100644 --- a/cpp/include/raft/random/multi_variable_gaussian.hpp +++ b/cpp/include/raft/random/multi_variable_gaussian.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MVG_H +#define __MVG_H #pragma once @@ -56,4 +63,6 @@ class multi_variable_gaussian : public detail::multi_variable_gaussian_impl { ~multi_variable_gaussian() { deinit(); } }; // end of multi_variable_gaussian -}; // end of namespace raft::random \ No newline at end of file +}; // end of namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/permute.cuh b/cpp/include/raft/random/permute.cuh new file mode 100644 index 0000000000..1c01d589f4 --- /dev/null +++ b/cpp/include/raft/random/permute.cuh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PERMUTE_H +#define __PERMUTE_H + +#pragma once + +#include "detail/permute.cuh" + +namespace raft::random { + +/** + * @brief Generate permutations of the input array. Pretty useful primitive for + * shuffling the input datasets in ML algos. See note at the end for some of its + * limitations! + * @tparam Type Data type of the array to be shuffled + * @tparam IntType Integer type used for ther perms array + * @tparam IdxType Integer type used for addressing indices + * @tparam TPB threads per block + * @param perms the output permutation indices. Typically useful only when + * one wants to refer back. If you don't need this, pass a nullptr + * @param out the output shuffled array. Pass nullptr if you don't want this to + * be written. For eg: when you only want the perms array to be filled. + * @param in input array (in-place is not supported due to race conditions!) + * @param D number of columns of the input array + * @param N length of the input array (or number of rows) + * @param rowMajor whether the input/output matrices are row or col major + * @param stream cuda stream where to launch the work + * + * @note This is NOT a uniform permutation generator! In fact, it only generates + * very small percentage of permutations. If your application really requires a + * high quality permutation generator, it is recommended that you pick + * Knuth Shuffle. + */ +template +void permute(IntType* perms, + Type* out, + const Type* in, + IntType D, + IntType N, + bool rowMajor, + cudaStream_t stream) +{ + detail::permute(perms, out, in, D, N, rowMajor, stream); +} + +}; // end namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/permute.hpp b/cpp/include/raft/random/permute.hpp index 32ed3779e4..3507d66cc3 100644 --- a/cpp/include/raft/random/permute.hpp +++ b/cpp/include/raft/random/permute.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __PERMUTE_H +#define __PERMUTE_H #pragma once @@ -55,4 +62,6 @@ void permute(IntType* perms, detail::permute(perms, out, in, D, N, rowMajor, stream); } -}; // end namespace raft::random \ No newline at end of file +}; // end namespace raft::random + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh new file mode 100644 index 0000000000..3e75b2ae74 --- /dev/null +++ b/cpp/include/raft/random/rng.cuh @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RNG_H +#define __RNG_H + +#pragma once + +#include "detail/rng_impl.cuh" + +namespace raft { +namespace random { + +using detail::RngState; + +using detail::GeneratorType; +using detail::GenPC; +using detail::GenPhilox; + +using detail::PCGenerator; +using detail::PhiloxGenerator; + +using detail::BernoulliDistParams; +using detail::ExponentialDistParams; +using detail::GumbelDistParams; +using detail::InvariantDistParams; +using detail::LaplaceDistParams; +using detail::LogisticDistParams; +using detail::LogNormalDistParams; +using detail::NormalDistParams; +using detail::NormalIntDistParams; +using detail::NormalTableDistParams; +using detail::RayleighDistParams; +using detail::SamplingParams; +using detail::ScaledBernoulliDistParams; +using detail::UniformDistParams; +using detail::UniformIntDistParams; + +// Not strictly needed due to C++ ADL rules +using detail::custom_next; + +/** + * @brief Helper method to compute Box Muller transform + * + * @tparam Type data type + * + * @param[inout] val1 first value + * @param[inout] val2 second value + * @param[in] sigma1 standard deviation of output gaussian for first value + * @param[in] mu1 mean of output gaussian for first value + * @param[in] sigma2 standard deviation of output gaussian for second value + * @param[in] mu2 mean of output gaussian for second value + * @{ + */ +template +DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1, Type sigma2, Type mu2) +{ + detail::box_muller_transform(val1, val2, sigma1, mu1, sigma2, mu2); +} + +template +DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1) +{ + detail::box_muller_transform(val1, val2, sigma1, mu1); +} +/** @} */ + +class Rng : public detail::RngImpl { + public: + /** + * @brief ctor + * @param _s 64b seed used to initialize the RNG + * @param _t backend device RNG generator type + * @note Refer to the `Rng::seed` method for details about seeding the engine + */ + Rng(uint64_t _s, GeneratorType _t = GenPhilox) : detail::RngImpl(_s, _t) {} + + /** + * @brief Generates the 'a' and 'b' parameters for a modulo affine + * transformation equation: `(ax + b) % n` + * + * @tparam IdxT integer type + * + * @param[in] n the modulo range + * @param[out] a slope parameter + * @param[out] b intercept parameter + */ + template + void affine_transform_params(IdxT n, IdxT& a, IdxT& b) + { + detail::RngImpl::affine_transform_params(n, a, b); + } + + /** + * @brief Generate uniformly distributed numbers in the given range + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output array + * @param len the number of elements in the output + * @param start start of the range + * @param end end of the range + * @param stream stream where to launch the kernel + * @{ + */ + template + void uniform(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) + { + detail::RngImpl::uniform(ptr, len, start, end, stream); + } + + template + void uniformInt(OutType* ptr, LenType len, OutType start, OutType end, cudaStream_t stream) + { + detail::RngImpl::uniformInt(ptr, len, start, end, stream); + } + /** @} */ + + /** + * @brief Generate normal distributed numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output array + * @param len the number of elements in the output + * @param mu mean of the distribution + * @param sigma std-dev of the distribution + * @param stream stream where to launch the kernel + * @{ + */ + template + void normal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) + { + detail::RngImpl::normal(ptr, len, mu, sigma, stream); + } + + template + void normalInt(IntType* ptr, LenType len, IntType mu, IntType sigma, cudaStream_t stream) + { + detail::RngImpl::normalInt(ptr, len, mu, sigma, stream); + } + /** @} */ + + /** + * @brief Generate normal distributed table according to the given set of + * means and scalar standard deviations. + * + * Each row in this table conforms to a normally distributed n-dim vector + * whose mean is the input vector and standard deviation is the corresponding + * vector or scalar. Correlations among the dimensions itself is assumed to + * be absent. + * + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output table (dim = n_rows x n_cols) + * @param n_rows number of rows in the table + * @param n_cols number of columns in the table + * @param mu_vec mean vector (dim = n_cols x 1). + * @param sigma_vec std-dev vector of each component (dim = n_cols x 1). Pass + * a nullptr to use the same scalar 'sigma' across all components + * @param sigma scalar sigma to be used if 'sigma_vec' is nullptr + * @param stream stream where to launch the kernel + */ + template + void normalTable(OutType* ptr, + LenType n_rows, + LenType n_cols, + const OutType* mu_vec, + const OutType* sigma_vec, + OutType sigma, + cudaStream_t stream) + { + detail::RngImpl::normalTable(ptr, n_rows, n_cols, mu_vec, sigma_vec, sigma, stream); + } + + /** + * @brief Fill an array with the given value + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output array + * @param len the number of elements in the output + * @param val value to be filled + * @param stream stream where to launch the kernel + */ + template + void fill(OutType* ptr, LenType len, OutType val, cudaStream_t stream) + { + detail::RngImpl::fill(ptr, len, val, stream); + } + + /** + * @brief Generate bernoulli distributed boolean array + * + * @tparam Type data type in which to compute the probabilities + * @tparam OutType output data type + * @tparam LenType data type used to represent length of the arrays + * + * @param[out] ptr the output array + * @param[in] len the number of elements in the output + * @param[in] prob coin-toss probability for heads + * @param[in] stream stream where to launch the kernel + */ + template + void bernoulli(OutType* ptr, LenType len, Type prob, cudaStream_t stream) + { + detail::RngImpl::bernoulli(ptr, len, prob, stream); + } + + /** + * @brief Generate bernoulli distributed array and applies scale + * @tparam Type data type in which to compute the probabilities + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output array + * @param len the number of elements in the output + * @param prob coin-toss probability for heads + * @param scale scaling factor + * @param stream stream where to launch the kernel + */ + template + void scaled_bernoulli(OutType* ptr, LenType len, OutType prob, OutType scale, cudaStream_t stream) + { + detail::RngImpl::scaled_bernoulli(ptr, len, prob, scale, stream); + } + + /** + * @brief Generate Gumbel distributed random numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr output array + * @param len number of elements in the output array + * @param mu mean value + * @param beta scale value + * @param stream stream where to launch the kernel + * @note https://en.wikipedia.org/wiki/Gumbel_distribution + */ + template + void gumbel(OutType* ptr, LenType len, OutType mu, OutType beta, cudaStream_t stream) + { + detail::RngImpl::gumbel(ptr, len, mu, beta, stream); + } + + /** + * @brief Generate lognormal distributed numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr the output array + * @param len the number of elements in the output + * @param mu mean of the distribution + * @param sigma std-dev of the distribution + * @param stream stream where to launch the kernel + */ + template + void lognormal(OutType* ptr, LenType len, OutType mu, OutType sigma, cudaStream_t stream) + { + detail::RngImpl::lognormal(ptr, len, mu, sigma, stream); + } + + /** + * @brief Generate logistic distributed random numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr output array + * @param len number of elements in the output array + * @param mu mean value + * @param scale scale value + * @param stream stream where to launch the kernel + */ + template + void logistic(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) + { + detail::RngImpl::logistic(ptr, len, mu, scale, stream); + } + + /** + * @brief Generate exponentially distributed random numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr output array + * @param len number of elements in the output array + * @param lambda the lambda + * @param stream stream where to launch the kernel + */ + template + void exponential(OutType* ptr, LenType len, OutType lambda, cudaStream_t stream) + { + detail::RngImpl::exponential(ptr, len, lambda, stream); + } + + /** + * @brief Generate rayleigh distributed random numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr output array + * @param len number of elements in the output array + * @param sigma the sigma + * @param stream stream where to launch the kernel + */ + template + void rayleigh(OutType* ptr, LenType len, OutType sigma, cudaStream_t stream) + { + detail::RngImpl::rayleigh(ptr, len, sigma, stream); + } + + /** + * @brief Generate laplace distributed random numbers + * @tparam Type data type of output random number + * @tparam LenType data type used to represent length of the arrays + * @param ptr output array + * @param len number of elements in the output array + * @param mu the mean + * @param scale the scale + * @param stream stream where to launch the kernel + */ + template + void laplace(OutType* ptr, LenType len, OutType mu, OutType scale, cudaStream_t stream) + { + detail::RngImpl::laplace(ptr, len, mu, scale, stream); + } + + void advance(uint64_t max_streams, uint64_t max_calls_per_subsequence) + { + detail::RngImpl::advance(max_streams, max_calls_per_subsequence); + } + + /** + * @brief Sample the input array without replacement, optionally based on the + * input weight vector for each element in the array + * + * Implementation here is based on the `one-pass sampling` algo described here: + * https://www.ethz.ch/content/dam/ethz/special-interest/baug/ivt/ivt-dam/vpl/reports/1101-1200/ab1141.pdf + * + * @note In the sampled array the elements which are picked will always appear + * in the increasing order of their weights as computed using the exponential + * distribution. So, if you're particular about the order (for eg. array + * permutations), then this might not be the right choice! + * + * @tparam DataT data type + * @tparam WeightsT weights type + * @tparam IdxT index type + * @param handle + * @param out output sampled array (of length 'sampledLen') + * @param outIdx indices of the sampled array (of length 'sampledLen'). Pass + * a nullptr if this is not required. + * @param in input array to be sampled (of length 'len') + * @param wts weights array (of length 'len'). Pass a nullptr if uniform + * sampling is desired + * @param sampledLen output sampled array length + * @param len input array length + * @param stream cuda stream + */ + template + void sampleWithoutReplacement(const raft::handle_t& handle, + DataT* out, + IdxT* outIdx, + const DataT* in, + const WeightsT* wts, + IdxT sampledLen, + IdxT len, + cudaStream_t stream) + { + detail::RngImpl::sampleWithoutReplacement( + handle, out, outIdx, in, wts, sampledLen, len, stream); + } +}; + +}; // end namespace random +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/random/rng.hpp b/cpp/include/raft/random/rng.hpp index 2b1bdbccf7..2d1af6a97e 100644 --- a/cpp/include/raft/random/rng.hpp +++ b/cpp/include/raft/random/rng.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __RNG_H +#define __RNG_H #pragma once @@ -373,3 +380,5 @@ class Rng : public detail::RngImpl { }; // end namespace random }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/coo.cuh b/cpp/include/raft/sparse/convert/coo.cuh new file mode 100644 index 0000000000..b5568ef7d9 --- /dev/null +++ b/cpp/include/raft/sparse/convert/coo.cuh @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __COO_H +#define __COO_H + +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace convert { + +/** + * @brief Convert a CSR row_ind array to a COO rows array + * @param row_ind: Input CSR row_ind array + * @param m: size of row_ind array + * @param coo_rows: Output COO row array + * @param nnz: size of output COO row array + * @param stream: cuda stream to use + */ +template +void csr_to_coo( + const value_idx* row_ind, value_idx m, value_idx* coo_rows, value_idx nnz, cudaStream_t stream) +{ + detail::csr_to_coo(row_ind, m, coo_rows, nnz, stream); +} + +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/coo.hpp b/cpp/include/raft/sparse/convert/coo.hpp index c647b99620..009a19a563 100644 --- a/cpp/include/raft/sparse/convert/coo.hpp +++ b/cpp/include/raft/sparse/convert/coo.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __COO_H +#define __COO_H #pragma once @@ -39,4 +46,6 @@ void csr_to_coo( }; // end NAMESPACE convert }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh new file mode 100644 index 0000000000..10bc22bcc1 --- /dev/null +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CSR_H +#define __CSR_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace convert { + +template +void coo_to_csr(const raft::handle_t& handle, + const int* srcRows, + const int* srcCols, + const value_t* srcVals, + int nnz, + int m, + int* dst_offsets, + int* dstCols, + value_t* dstVals) +{ + detail::coo_to_csr(handle, srcRows, srcCols, srcVals, nnz, m, dst_offsets, dstCols, dstVals); +} + +/** + * @brief Constructs an adjacency graph CSR row_ind_ptr array from + * a row_ind array and adjacency array. + * @tparam T the numeric type of the index arrays + * @tparam TPB_X the number of threads to use per block for kernels + * @tparam Lambda function for fused operation in the adj_graph construction + * @param row_ind the input CSR row_ind array + * @param total_rows number of vertices in graph + * @param nnz number of non-zeros + * @param batchSize number of vertices in current batch + * @param adj an adjacency array (size batchSize x total_rows) + * @param row_ind_ptr output CSR row_ind_ptr for adjacency graph + * @param stream cuda stream to use + * @param fused_op: the fused operation + */ +template void> +void csr_adj_graph_batched(const Index_* row_ind, + Index_ total_rows, + Index_ nnz, + Index_ batchSize, + const bool* adj, + Index_* row_ind_ptr, + cudaStream_t stream, + Lambda fused_op) +{ + detail::csr_adj_graph_batched( + row_ind, total_rows, nnz, batchSize, adj, row_ind_ptr, stream, fused_op); +} + +template void> +void csr_adj_graph_batched(const Index_* row_ind, + Index_ total_rows, + Index_ nnz, + Index_ batchSize, + const bool* adj, + Index_* row_ind_ptr, + cudaStream_t stream) +{ + detail::csr_adj_graph_batched( + row_ind, total_rows, nnz, batchSize, adj, row_ind_ptr, stream); +} + +/** + * @brief Constructs an adjacency graph CSR row_ind_ptr array from a + * a row_ind array and adjacency array. + * @tparam T the numeric type of the index arrays + * @tparam TPB_X the number of threads to use per block for kernels + * @param row_ind the input CSR row_ind array + * @param total_rows number of total vertices in graph + * @param nnz number of non-zeros + * @param adj an adjacency array + * @param row_ind_ptr output CSR row_ind_ptr for adjacency graph + * @param stream cuda stream to use + * @param fused_op the fused operation + */ +template void> +void csr_adj_graph(const Index_* row_ind, + Index_ total_rows, + Index_ nnz, + const bool* adj, + Index_* row_ind_ptr, + cudaStream_t stream, + Lambda fused_op) +{ + detail::csr_adj_graph( + row_ind, total_rows, nnz, adj, row_ind_ptr, stream, fused_op); +} + +/** + * @brief Generate the row indices array for a sorted COO matrix + * + * @param rows: COO rows array + * @param nnz: size of COO rows array + * @param row_ind: output row indices array + * @param m: number of rows in dense matrix + * @param stream: cuda stream to use + */ +template +void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream) +{ + detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream); +} + +/** + * @brief Generate the row indices array for a sorted COO matrix + * + * @param coo: Input COO matrix + * @param row_ind: output row indices array + * @param stream: cuda stream to use + */ +template +void sorted_coo_to_csr(COO* coo, int* row_ind, cudaStream_t stream) +{ + detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream); +} + +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/csr.hpp b/cpp/include/raft/sparse/convert/csr.hpp index f0fe76bed3..6a9a99d014 100644 --- a/cpp/include/raft/sparse/convert/csr.hpp +++ b/cpp/include/raft/sparse/convert/csr.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __CSR_H +#define __CSR_H #pragma once @@ -135,4 +142,6 @@ void sorted_coo_to_csr(COO* coo, int* row_ind, cudaStream_t stream) }; // end NAMESPACE convert }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/dense.cuh b/cpp/include/raft/sparse/convert/dense.cuh new file mode 100644 index 0000000000..a146113a86 --- /dev/null +++ b/cpp/include/raft/sparse/convert/dense.cuh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DENSE_H +#define __DENSE_H + +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace convert { + +/** + * Convert CSR arrays to a dense matrix in either row- + * or column-major format. A custom kernel is used when + * row-major output is desired since cusparse does not + * output row-major. + * @tparam value_idx : data type of the CSR index arrays + * @tparam value_t : data type of the CSR value array + * @param[in] handle : cusparse handle for conversion + * @param[in] nrows : number of rows in CSR + * @param[in] ncols : number of columns in CSR + * @param[in] nnz : number of nonzeros in CSR + * @param[in] csr_indptr : CSR row index pointer array + * @param[in] csr_indices : CSR column indices array + * @param[in] csr_data : CSR data array + * @param[in] lda : Leading dimension (used for col-major only) + * @param[out] out : Dense output array of size nrows * ncols + * @param[in] stream : Cuda stream for ordering events + * @param[in] row_major : Is row-major output desired? + */ +template +void csr_to_dense(cusparseHandle_t handle, + value_idx nrows, + value_idx ncols, + value_idx nnz, + const value_idx* csr_indptr, + const value_idx* csr_indices, + const value_t* csr_data, + value_idx lda, + value_t* out, + cudaStream_t stream, + bool row_major = true) +{ + detail::csr_to_dense( + handle, nrows, ncols, nnz, csr_indptr, csr_indices, csr_data, lda, out, stream, row_major); +} + +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/dense.hpp b/cpp/include/raft/sparse/convert/dense.hpp index 2570d7ae65..1bdfa26732 100644 --- a/cpp/include/raft/sparse/convert/dense.hpp +++ b/cpp/include/raft/sparse/convert/dense.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __DENSE_H +#define __DENSE_H #pragma once @@ -60,4 +67,6 @@ void csr_to_dense(cusparseHandle_t handle, }; // end NAMESPACE convert }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh index c37087789c..2d13bfa34e 100644 --- a/cpp/include/raft/sparse/convert/detail/coo.cuh +++ b/cpp/include/raft/sparse/convert/detail/coo.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index 751335dfca..2516d00533 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,8 +35,8 @@ #include #include -#include -#include +#include +#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/convert/detail/dense.cuh b/cpp/include/raft/sparse/convert/detail/dense.cuh index b2756b81c9..4f97cee8b4 100644 --- a/cpp/include/raft/sparse/convert/detail/dense.cuh +++ b/cpp/include/raft/sparse/convert/detail/dense.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/detail/csr.cuh b/cpp/include/raft/sparse/detail/csr.cuh index a256ac402b..1fd2bb9366 100644 --- a/cpp/include/raft/sparse/detail/csr.cuh +++ b/cpp/include/raft/sparse/detail/csr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/detail/cusparse_macros.h b/cpp/include/raft/sparse/detail/cusparse_macros.h index 1f9f0e5175..10c7e8836c 100644 --- a/cpp/include/raft/sparse/detail/cusparse_macros.h +++ b/cpp/include/raft/sparse/detail/cusparse_macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h index aef3976294..b9c4a61850 100644 --- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh index 124fa2285d..7c1229b0d3 100644 --- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh index 020de9e014..9edd1305b3 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh index 6e717e9920..0848d24bde 100644 --- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,12 +22,12 @@ #include #include -#include +#include #include #include #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh index e6dd396f2d..468689848b 100644 --- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,12 @@ #pragma once -#include +#include #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh index 96d51f2e75..c6ff32caf3 100644 --- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh +++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh index 06c034ad9f..a2fe090c96 100644 --- a/cpp/include/raft/sparse/distance/detail/utils.cuh +++ b/cpp/include/raft/sparse/distance/detail/utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh new file mode 100644 index 0000000000..ab189796ea --- /dev/null +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SPARSE_DIST_H +#define __SPARSE_DIST_H + +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include + +namespace raft { +namespace sparse { +namespace distance { + +static const std::unordered_set supportedDistance{ + raft::distance::DistanceType::L2Expanded, + raft::distance::DistanceType::L2Unexpanded, + raft::distance::DistanceType::L2SqrtExpanded, + raft::distance::DistanceType::L2SqrtUnexpanded, + raft::distance::DistanceType::InnerProduct, + raft::distance::DistanceType::L1, + raft::distance::DistanceType::Canberra, + raft::distance::DistanceType::Linf, + raft::distance::DistanceType::LpUnexpanded, + raft::distance::DistanceType::JaccardExpanded, + raft::distance::DistanceType::CosineExpanded, + raft::distance::DistanceType::HellingerExpanded, + raft::distance::DistanceType::DiceExpanded, + raft::distance::DistanceType::CorrelationExpanded, + raft::distance::DistanceType::RusselRaoExpanded, + raft::distance::DistanceType::HammingUnexpanded, + raft::distance::DistanceType::JensenShannon, + raft::distance::DistanceType::KLDivergence}; + +/** + * Compute pairwise distances between A and B, using the provided + * input configuration and distance function. + * + * @tparam value_idx index type + * @tparam value_t value type + * @param[out] out dense output array (size A.nrows * B.nrows) + * @param[in] input_config input argument configuration + * @param[in] metric distance metric to use + * @param[in] metric_arg metric argument (used for Minkowski distance) + */ +template +void pairwiseDistance(value_t* out, + distances_config_t input_config, + raft::distance::DistanceType metric, + float metric_arg) +{ + switch (metric) { + case raft::distance::DistanceType::L2Expanded: + detail::l2_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::L2SqrtExpanded: + detail::l2_sqrt_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::InnerProduct: + detail::ip_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::L2Unexpanded: + detail::l2_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::L2SqrtUnexpanded: + detail::l2_sqrt_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::L1: + detail::l1_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::LpUnexpanded: + detail::lp_unexpanded_distances_t(input_config, metric_arg).compute(out); + break; + case raft::distance::DistanceType::Linf: + detail::linf_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::Canberra: + detail::canberra_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::JaccardExpanded: + detail::jaccard_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::CosineExpanded: + detail::cosine_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::HellingerExpanded: + detail::hellinger_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::DiceExpanded: + detail::dice_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::CorrelationExpanded: + detail::correlation_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::RusselRaoExpanded: + detail::russelrao_expanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::HammingUnexpanded: + detail::hamming_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::JensenShannon: + detail::jensen_shannon_unexpanded_distances_t(input_config).compute(out); + break; + case raft::distance::DistanceType::KLDivergence: + detail::kl_divergence_unexpanded_distances_t(input_config).compute(out); + break; + + default: THROW("Unsupported distance: %d", metric); + } +} + +}; // namespace distance +}; // namespace sparse +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp index dc9837ab43..cba419e53a 100644 --- a/cpp/include/raft/sparse/distance/distance.hpp +++ b/cpp/include/raft/sparse/distance/distance.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_DIST_H +#define __SPARSE_DIST_H #pragma once @@ -130,3 +137,5 @@ void pairwiseDistance(value_t* out, }; // namespace distance }; // namespace sparse }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh index 105f1cc9f6..c89f5a370a 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh index fe58246545..9d4126f8fd 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,14 +20,14 @@ #include #include -#include +#include #include #include -#include +#include #include #include -#include +#include #include diff --git a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh index 10e9d04c0d..545a371850 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/include/raft/sparse/hierarchy/detail/single_linkage.hpp b/cpp/include/raft/sparse/hierarchy/detail/single_linkage.cuh similarity index 99% rename from cpp/include/raft/sparse/hierarchy/detail/single_linkage.hpp rename to cpp/include/raft/sparse/hierarchy/detail/single_linkage.cuh index 702198e422..4e94b6f65d 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/single_linkage.hpp +++ b/cpp/include/raft/sparse/hierarchy/detail/single_linkage.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/hierarchy/single_linkage.cuh b/cpp/include/raft/sparse/hierarchy/single_linkage.cuh new file mode 100644 index 0000000000..86940005b4 --- /dev/null +++ b/cpp/include/raft/sparse/hierarchy/single_linkage.cuh @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SINGLE_LINKAGE_H +#define __SINGLE_LINKAGE_H + +#pragma once + +#include +#include + +namespace raft { +namespace hierarchy { + +/** + * Single-linkage clustering, capable of constructing a KNN graph to + * scale the algorithm beyond the n^2 memory consumption of implementations + * that use the fully-connected graph of pairwise distances by connecting + * a knn graph when k is not large enough to connect it. + + * @tparam value_idx + * @tparam value_t + * @tparam dist_type method to use for constructing connectivities graph + * @param[in] handle raft handle + * @param[in] X dense input matrix in row-major layout + * @param[in] m number of rows in X + * @param[in] n number of columns in X + * @param[in] metric distance metrix to use when constructing connectivities graph + * @param[out] out struct containing output dendrogram and cluster assignments + * @param[in] c a constant used when constructing connectivities from knn graph. Allows the indirect + control + * of k. The algorithm will set `k = log(n) + c` + * @param[in] n_clusters number of clusters to assign data samples + */ +template +void single_linkage(const raft::handle_t& handle, + const value_t* X, + size_t m, + size_t n, + raft::distance::DistanceType metric, + linkage_output* out, + int c, + size_t n_clusters) +{ + detail::single_linkage( + handle, X, m, n, metric, out, c, n_clusters); +} +}; // namespace hierarchy +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/hierarchy/single_linkage.hpp b/cpp/include/raft/sparse/hierarchy/single_linkage.hpp index 104c1235d4..e7a37b7bf5 100644 --- a/cpp/include/raft/sparse/hierarchy/single_linkage.hpp +++ b/cpp/include/raft/sparse/hierarchy/single_linkage.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,11 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SINGLE_LINKAGE_H +#define __SINGLE_LINKAGE_H #pragma once #include -#include +#include namespace raft { namespace hierarchy { @@ -59,3 +66,5 @@ void single_linkage(const raft::handle_t& handle, } }; // namespace hierarchy }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh new file mode 100644 index 0000000000..def305afb2 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/add.cuh @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_ADD_H +#define __SPARSE_ADD_H + +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @brief Calculate the CSR row_ind array that would result + * from summing together two CSR matrices + * @param a_ind: left hand row_ind array + * @param a_indptr: left hand index_ptr array + * @param a_val: left hand data array + * @param nnz1: size of left hand index_ptr and val arrays + * @param b_ind: right hand row_ind array + * @param b_indptr: right hand index_ptr array + * @param b_val: right hand data array + * @param nnz2: size of right hand index_ptr and val arrays + * @param m: size of output array (number of rows in final matrix) + * @param out_ind: output row_ind array + * @param stream: cuda stream to use + */ +template +size_t csr_add_calc_inds(const int* a_ind, + const int* a_indptr, + const T* a_val, + int nnz1, + const int* b_ind, + const int* b_indptr, + const T* b_val, + int nnz2, + int m, + int* out_ind, + cudaStream_t stream) +{ + return detail::csr_add_calc_inds( + a_ind, a_indptr, a_val, nnz1, b_ind, b_indptr, b_val, nnz2, m, out_ind, stream); +} + +/** + * @brief Calculate the CSR row_ind array that would result + * from summing together two CSR matrices + * @param a_ind: left hand row_ind array + * @param a_indptr: left hand index_ptr array + * @param a_val: left hand data array + * @param nnz1: size of left hand index_ptr and val arrays + * @param b_ind: right hand row_ind array + * @param b_indptr: right hand index_ptr array + * @param b_val: right hand data array + * @param nnz2: size of right hand index_ptr and val arrays + * @param m: size of output array (number of rows in final matrix) + * @param c_ind: output row_ind array + * @param c_indptr: output ind_ptr array + * @param c_val: output data array + * @param stream: cuda stream to use + */ +template +void csr_add_finalize(const int* a_ind, + const int* a_indptr, + const T* a_val, + int nnz1, + const int* b_ind, + const int* b_indptr, + const T* b_val, + int nnz2, + int m, + int* c_ind, + int* c_indptr, + T* c_val, + cudaStream_t stream) +{ + detail::csr_add_finalize( + a_ind, a_indptr, a_val, nnz1, b_ind, b_indptr, b_val, nnz2, m, c_ind, c_indptr, c_val, stream); +} + +}; // end NAMESPACE linalg +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/add.hpp b/cpp/include/raft/sparse/linalg/add.hpp index 30c39b1ffc..33259cb39f 100644 --- a/cpp/include/raft/sparse/linalg/add.hpp +++ b/cpp/include/raft/sparse/linalg/add.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_ADD_H +#define __SPARSE_ADD_H #pragma once @@ -93,3 +100,5 @@ void csr_add_finalize(const int* a_ind, }; // end NAMESPACE linalg }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh new file mode 100644 index 0000000000..57c9b986b4 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_DEGREE_H +#define __SPARSE_DEGREE_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @brief Count the number of values for each row + * @tparam TPB_X: number of threads to use per block + * @param rows: rows array of the COO matrix + * @param nnz: size of the rows array + * @param results: output result array + * @param stream: cuda stream to use + */ +template +void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream) +{ + detail::coo_degree<64, T>(rows, nnz, results, stream); +} + +/** + * @brief Count the number of values for each row + * @tparam TPB_X: number of threads to use per block + * @tparam T: type name of underlying values array + * @param in: input COO object for counting rows + * @param results: output array with row counts (size=in->n_rows) + * @param stream: cuda stream to use + */ +template +void coo_degree(COO* in, int* results, cudaStream_t stream) +{ + coo_degree(in->rows(), in->nnz, results, stream); +} + +/** + * @brief Count the number of values for each row that doesn't match a particular scalar + * @tparam TPB_X: number of threads to use per block + * @tparam T: the type name of the underlying value arrays + * @param rows: Input COO row array + * @param vals: Input COO val arrays + * @param nnz: size of input COO arrays + * @param scalar: scalar to match for counting rows + * @param results: output row counts + * @param stream: cuda stream to use + */ +template +void coo_degree_scalar( + const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0) +{ + detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); +} + +/** + * @brief Count the number of values for each row that doesn't match a particular scalar + * @tparam TPB_X: number of threads to use per block + * @tparam T: the type name of the underlying value arrays + * @param in: Input COO array + * @param scalar: scalar to match for counting rows + * @param results: output row counts + * @param stream: cuda stream to use + */ +template +void coo_degree_scalar(COO* in, T scalar, int* results, cudaStream_t stream) +{ + coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream); +} + +/** + * @brief Count the number of nonzeros for each row + * @tparam TPB_X: number of threads to use per block + * @tparam T: the type name of the underlying value arrays + * @param rows: Input COO row array + * @param vals: Input COO val arrays + * @param nnz: size of input COO arrays + * @param results: output row counts + * @param stream: cuda stream to use + */ +template +void coo_degree_nz(const int* rows, const T* vals, int nnz, int* results, cudaStream_t stream) +{ + detail::coo_degree_nz<64>(rows, vals, nnz, results, stream); +} + +/** + * @brief Count the number of nonzero values for each row + * @tparam TPB_X: number of threads to use per block + * @tparam T: the type name of the underlying value arrays + * @param in: Input COO array + * @param results: output row counts + * @param stream: cuda stream to use + */ +template +void coo_degree_nz(COO* in, int* results, cudaStream_t stream) +{ + coo_degree_nz(in->rows(), in->vals(), in->nnz, results, stream); +} + +}; // end NAMESPACE linalg +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/degree.hpp b/cpp/include/raft/sparse/linalg/degree.hpp index 04643b219d..0c6af596ce 100644 --- a/cpp/include/raft/sparse/linalg/degree.hpp +++ b/cpp/include/raft/sparse/linalg/degree.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_DEGREE_H +#define __SPARSE_DEGREE_H #pragma once @@ -117,3 +124,5 @@ void coo_degree_nz(COO* in, int* results, cudaStream_t stream) }; // end NAMESPACE linalg }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/detail/add.cuh b/cpp/include/raft/sparse/linalg/detail/add.cuh index b288d0a603..5c3d07fc02 100644 --- a/cpp/include/raft/sparse/linalg/detail/add.cuh +++ b/cpp/include/raft/sparse/linalg/detail/add.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index b7420a55e7..ba0ecd5dcc 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 7e5bd5b9e4..c295932719 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -17,12 +17,12 @@ #include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include namespace raft { diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 4384f2ba55..9143aac84f 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include @@ -35,10 +35,10 @@ #include #include -#include +#include #include #include -#include +#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/linalg/detail/transpose.h b/cpp/include/raft/sparse/linalg/detail/transpose.h index 398877eaab..4820b489d1 100644 --- a/cpp/include/raft/sparse/linalg/detail/transpose.h +++ b/cpp/include/raft/sparse/linalg/detail/transpose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh new file mode 100644 index 0000000000..e13fd22843 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_NORM_H +#define __SPARSE_NORM_H + +#pragma once + +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @brief Perform L1 normalization on the rows of a given CSR-formatted sparse matrix + * + * @param ia: row_ind array + * @param vals: data array + * @param nnz: size of data array + * @param m: size of row_ind array + * @param result: l1 normalized data array + * @param stream: cuda stream to use + */ +template +void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) + const T* vals, + int nnz, // array of values and number of non-zeros + int m, // num rows in csr + T* result, + cudaStream_t stream) +{ // output array + detail::csr_row_normalize_l1(ia, vals, nnz, m, result, stream); +} + +/** + * @brief Perform L_inf normalization on a given CSR-formatted sparse matrix + * + * @param ia: row_ind array + * @param vals: data array + * @param nnz: size of data array + * @param m: size of row_ind array + * @param result: l1 normalized data array + * @param stream: cuda stream to use + */ +template +void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) + const T* vals, + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr + T* result, + cudaStream_t stream) +{ + detail::csr_row_normalize_max(ia, vals, nnz, m, result, stream); +} + +}; // end NAMESPACE linalg +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/norm.hpp b/cpp/include/raft/sparse/linalg/norm.hpp index 683daedf4f..196951bac7 100644 --- a/cpp/include/raft/sparse/linalg/norm.hpp +++ b/cpp/include/raft/sparse/linalg/norm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_NORM_H +#define __SPARSE_NORM_H #pragma once @@ -66,4 +73,6 @@ void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh new file mode 100644 index 0000000000..fe95d1414c --- /dev/null +++ b/cpp/include/raft/sparse/linalg/spectral.cuh @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_SPECTRAL_H +#define __SPARSE_SPECTRAL_H + +#include +#include + +namespace raft { +namespace sparse { +namespace spectral { + +template +void fit_embedding(const raft::handle_t& handle, + int* rows, + int* cols, + T* vals, + int nnz, + int n, + int n_components, + T* out, + unsigned long long seed = 1234567) +{ + detail::fit_embedding(handle, rows, cols, vals, nnz, n, n_components, out, seed); +} +}; // namespace spectral +}; // namespace sparse +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/spectral.hpp b/cpp/include/raft/sparse/linalg/spectral.hpp index 619987062f..9daa6e07b0 100644 --- a/cpp/include/raft/sparse/linalg/spectral.hpp +++ b/cpp/include/raft/sparse/linalg/spectral.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_SPECTRAL_H +#define __SPARSE_SPECTRAL_H #include #include @@ -37,3 +44,5 @@ void fit_embedding(const raft::handle_t& handle, }; // namespace spectral }; // namespace sparse }; // namespace raft + +#endif diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh new file mode 100644 index 0000000000..d41540c0b3 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SYMMETRIZE_H +#define __SYMMETRIZE_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * @brief takes a COO matrix which may not be symmetric and symmetrizes + * it, running a custom reduction function against the each value + * and its transposed value. + * + * @param in: Input COO matrix + * @param out: Output symmetrized COO matrix + * @param reduction_op: a custom reduction function + * @param stream: cuda stream to use + */ +template +void coo_symmetrize(COO* in, + COO* out, + Lambda reduction_op, // two-argument reducer + cudaStream_t stream) +{ + detail::coo_symmetrize(in, out, reduction_op, stream); +} + +/** + * @brief Find how much space needed in each row. + * We look through all datapoints and increment the count for each row. + * + * TODO: This isn't generalized. Remove in place of `symmetrize()` + * @param data: Input knn distances(n, k) + * @param indices: Input knn indices(n, k) + * @param n: Number of rows + * @param k: Number of n_neighbors + * @param row_sizes: Input empty row sum 1 array(n) + * @param row_sizes2: Input empty row sum 2 array(n) for faster reduction + */ +template +__global__ static void symmetric_find_size(const value_t* __restrict__ data, + const value_idx* __restrict__ indices, + const value_idx n, + const int k, + value_idx* __restrict__ row_sizes, + value_idx* __restrict__ row_sizes2) +{ + detail::symmetric_find_size(data, indices, n, k, row_sizes, row_sizes2); +} + +/** + * @brief Reduce sum(row_sizes) + k + * Reduction for symmetric_find_size kernel. Allows algo to be faster. + * + * TODO: This isn't generalized. Remove in place of `symmetrize()` + * @param n: Number of rows + * @param k: Number of n_neighbors + * @param row_sizes: Input row sum 1 array(n) + * @param row_sizes2: Input row sum 2 array(n) for faster reduction + */ +template +__global__ static void reduce_find_size(const value_idx n, + const int k, + value_idx* __restrict__ row_sizes, + const value_idx* __restrict__ row_sizes2) +{ + detail::reduce_find_size(n, k, row_sizes, row_sizes2); +} + +/** + * @brief Perform data + data.T operation. + * Can only run once row_sizes from the CSR matrix of data + data.T has been + * determined. + * + * TODO: This isn't generalized. Remove in place of `symmetrize()` + * + * @param edges: Input row sum array(n) after reduction + * @param data: Input knn distances(n, k) + * @param indices: Input knn indices(n, k) + * @param VAL: Output values for data + data.T + * @param COL: Output column indices for data + data.T + * @param ROW: Output row indices for data + data.T + * @param n: Number of rows + * @param k: Number of n_neighbors + */ +template +__global__ static void symmetric_sum(value_idx* __restrict__ edges, + const value_t* __restrict__ data, + const value_idx* __restrict__ indices, + value_t* __restrict__ VAL, + value_idx* __restrict__ COL, + value_idx* __restrict__ ROW, + const value_idx n, + const int k) +{ + detail::symmetric_sum(edges, data, indices, VAL, COL, ROW, n, k); +} + +/** + * @brief Perform data + data.T on raw KNN data. + * The following steps are invoked: + * (1) Find how much space needed in each row + * (2) Compute final space needed (n*k + sum(row_sizes)) == 2*n*k + * (3) Allocate new space + * (4) Prepare edges for each new row + * (5) Perform final data + data.T operation + * (6) Return summed up VAL, COL, ROW + * + * TODO: This isn't generalized. Remove in place of `symmetrize()` + * + * @param knn_indices: Input knn distances(n, k) + * @param knn_dists: Input knn indices(n, k) + * @param n: Number of rows + * @param k: Number of n_neighbors + * @param out: Output COO Matrix class + * @param stream: Input cuda stream + */ +template +void from_knn_symmetrize_matrix(const value_idx* __restrict__ knn_indices, + const value_t* __restrict__ knn_dists, + const value_idx n, + const int k, + COO* out, + cudaStream_t stream) +{ + detail::from_knn_symmetrize_matrix(knn_indices, knn_dists, n, k, out, stream); +} + +/** + * Symmetrizes a COO matrix + */ +template +void symmetrize(const raft::handle_t& handle, + const value_idx* rows, + const value_idx* cols, + const value_t* vals, + size_t m, + size_t n, + size_t nnz, + raft::sparse::COO& out) +{ + detail::symmetrize(handle, rows, cols, vals, m, n, nnz, out); +} + +}; // end NAMESPACE linalg +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/symmetrize.hpp b/cpp/include/raft/sparse/linalg/symmetrize.hpp index 64d27f5b6f..4d8520dabf 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.hpp +++ b/cpp/include/raft/sparse/linalg/symmetrize.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SYMMETRIZE_H +#define __SYMMETRIZE_H #pragma once @@ -162,3 +169,5 @@ void symmetrize(const raft::handle_t& handle, }; // end NAMESPACE linalg }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif diff --git a/cpp/include/raft/sparse/linalg/transpose.cuh b/cpp/include/raft/sparse/linalg/transpose.cuh new file mode 100644 index 0000000000..8f0105f512 --- /dev/null +++ b/cpp/include/raft/sparse/linalg/transpose.cuh @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __TRANSPOSE_H +#define __TRANSPOSE_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace linalg { + +/** + * Transpose a set of CSR arrays into a set of CSC arrays. + * @tparam value_idx : data type of the CSR index arrays + * @tparam value_t : data type of the CSR data array + * @param[in] handle : used for invoking cusparse + * @param[in] csr_indptr : CSR row index array + * @param[in] csr_indices : CSR column indices array + * @param[in] csr_data : CSR data array + * @param[out] csc_indptr : CSC row index array + * @param[out] csc_indices : CSC column indices array + * @param[out] csc_data : CSC data array + * @param[in] csr_nrows : Number of rows in CSR + * @param[in] csr_ncols : Number of columns in CSR + * @param[in] nnz : Number of nonzeros of CSR + * @param[in] stream : Cuda stream for ordering events + */ +template +void csr_transpose(const raft::handle_t& handle, + const value_idx* csr_indptr, + const value_idx* csr_indices, + const value_t* csr_data, + value_idx* csc_indptr, + value_idx* csc_indices, + value_t* csc_data, + value_idx csr_nrows, + value_idx csr_ncols, + value_idx nnz, + cudaStream_t stream) +{ + detail::csr_transpose(handle.get_cusparse_handle(), + csr_indptr, + csr_indices, + csr_data, + csc_indptr, + csc_indices, + csc_data, + csr_nrows, + csr_ncols, + nnz, + stream); +} + +}; // end NAMESPACE linalg +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/transpose.hpp b/cpp/include/raft/sparse/linalg/transpose.hpp index 6e40b647e9..0aea254803 100644 --- a/cpp/include/raft/sparse/linalg/transpose.hpp +++ b/cpp/include/raft/sparse/linalg/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __TRANSPOSE_H +#define __TRANSPOSE_H #pragma once @@ -68,3 +75,5 @@ void csr_transpose(const raft::handle_t& handle, }; // end NAMESPACE linalg }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/mst/mst.cuh b/cpp/include/raft/sparse/mst/mst.cuh index b49003467b..70a6ff521f 100644 --- a/cpp/include/raft/sparse/mst/mst.cuh +++ b/cpp/include/raft/sparse/mst/mst.cuh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#ifndef __MST_H +#define __MST_H #pragma once @@ -51,3 +53,5 @@ raft::Graph_COO mst(const raft::handle_t& handle, } // namespace mst } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/mst/mst.hpp b/cpp/include/raft/sparse/mst/mst.hpp new file mode 100644 index 0000000000..ac4cf21b64 --- /dev/null +++ b/cpp/include/raft/sparse/mst/mst.hpp @@ -0,0 +1,63 @@ + +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __MST_H +#define __MST_H + +#pragma once + +#include "mst_solver.cuh" + +namespace raft { +namespace mst { + +template +raft::Graph_COO mst(const raft::handle_t& handle, + edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + vertex_t const v, + edge_t const e, + vertex_t* color, + cudaStream_t stream, + bool symmetrize_output = true, + bool initialize_colors = true, + int iterations = 0) +{ + MST_solver mst_solver(handle, + offsets, + indices, + weights, + v, + e, + color, + stream, + symmetrize_output, + initialize_colors, + iterations); + return mst_solver.solve(); +} + +} // namespace mst +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 80a6584251..4e4e76946c 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ #include #include -#include +#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh index 988f478f2b..eb747cce1e 100644 --- a/cpp/include/raft/sparse/op/detail/reduce.cuh +++ b/cpp/include/raft/sparse/op/detail/reduce.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/row_op.cuh b/cpp/include/raft/sparse/op/detail/row_op.cuh index 4754f753d4..63c8cafaa7 100644 --- a/cpp/include/raft/sparse/op/detail/row_op.cuh +++ b/cpp/include/raft/sparse/op/detail/row_op.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/op/detail/slice.h b/cpp/include/raft/sparse/op/detail/slice.cuh similarity index 97% rename from cpp/include/raft/sparse/op/detail/slice.h rename to cpp/include/raft/sparse/op/detail/slice.cuh index e3c0f09e14..6bf6688076 100644 --- a/cpp/include/raft/sparse/op/detail/slice.h +++ b/cpp/include/raft/sparse/op/detail/slice.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 9fc7cac5e3..a8b8161716 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh new file mode 100644 index 0000000000..6c36538137 --- /dev/null +++ b/cpp/include/raft/sparse/op/filter.cuh @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __FILTER_H +#define __FILTER_H + +#pragma once + +#include +#include +#include + +namespace raft { +namespace sparse { +namespace op { + +/** + * @brief Removes the values matching a particular scalar from a COO formatted sparse matrix. + * + * @param rows: input array of rows (size n) + * @param cols: input array of cols (size n) + * @param vals: input array of vals (size n) + * @param nnz: size of current rows/cols/vals arrays + * @param crows: compressed array of rows + * @param ccols: compressed array of cols + * @param cvals: compressed array of vals + * @param cnnz: array of non-zero counts per row + * @param cur_cnnz array of counts per row + * @param scalar: scalar to remove from arrays + * @param n: number of rows in dense matrix + * @param stream: cuda stream to use + */ +template +void coo_remove_scalar(const int* rows, + const int* cols, + const T* vals, + int nnz, + int* crows, + int* ccols, + T* cvals, + int* cnnz, + int* cur_cnnz, + T scalar, + int n, + cudaStream_t stream) +{ + detail::coo_remove_scalar<128, T>( + rows, cols, vals, nnz, crows, ccols, cvals, cnnz, cur_cnnz, scalar, n, stream); +} + +/** + * @brief Removes the values matching a particular scalar from a COO formatted sparse matrix. + * + * @param in: input COO matrix + * @param out: output COO matrix + * @param scalar: scalar to remove from arrays + * @param stream: cuda stream to use + */ +template +void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) +{ + detail::coo_remove_scalar<128, T>(in, out, scalar, stream); +} + +/** + * @brief Removes zeros from a COO formatted sparse matrix. + * + * @param in: input COO matrix + * @param out: output COO matrix + * @param stream: cuda stream to use + */ +template +void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) +{ + coo_remove_scalar(in, out, T(0.0), stream); +} + +}; // namespace op +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/filter.hpp b/cpp/include/raft/sparse/op/filter.hpp index 0dff063e91..b67084f18a 100644 --- a/cpp/include/raft/sparse/op/filter.hpp +++ b/cpp/include/raft/sparse/op/filter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __FILTER_H +#define __FILTER_H #pragma once @@ -88,3 +95,5 @@ void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) }; // namespace op }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/reduce.cuh b/cpp/include/raft/sparse/op/reduce.cuh new file mode 100644 index 0000000000..fd860d2dc1 --- /dev/null +++ b/cpp/include/raft/sparse/op/reduce.cuh @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_REDUCE_H +#define __SPARSE_REDUCE_H + +#pragma once + +#include +#include +#include + +namespace raft { +namespace sparse { +namespace op { +/** + * Computes a mask from a sorted COO matrix where 0's denote + * duplicate values and 1's denote new values. This mask can + * be useful for computing an exclusive scan to pre-build offsets + * for reducing duplicates, such as when symmetrizing + * or taking the min of each duplicated value. + * + * Note that this function always marks the first value as 0 so that + * a cumulative sum can be performed as a follow-on. However, even + * if the mask is used direclty, any duplicates should always have a + * 1 when first encountered so it can be assumed that the first element + * is always a 1 otherwise. + * + * @tparam value_idx + * @param[out] mask output mask, size nnz + * @param[in] rows COO rows array, size nnz + * @param[in] cols COO cols array, size nnz + * @param[in] nnz number of nonzeros in input arrays + * @param[in] stream cuda ops will be ordered wrt this stream + */ +template +void compute_duplicates_mask( + value_idx* mask, const value_idx* rows, const value_idx* cols, size_t nnz, cudaStream_t stream) +{ + detail::compute_duplicates_mask(mask, rows, cols, nnz, stream); +} + +/** + * Performs a COO reduce of duplicate columns per row, taking the max weight + * for duplicate columns in each row. This function assumes the input COO + * has been sorted by both row and column but makes no assumption on + * the sorting of values. + * @tparam value_idx + * @tparam value_t + * @param[in] handle + * @param[out] out output COO, the nnz will be computed allocate() will be called in this function. + * @param[in] rows COO rows array, size nnz + * @param[in] cols COO cols array, size nnz + * @param[in] vals COO vals array, size nnz + * @param[in] nnz number of nonzeros in COO input arrays + * @param[in] m number of rows in COO input matrix + * @param[in] n number of columns in COO input matrix + */ +template +void max_duplicates(const raft::handle_t& handle, + raft::sparse::COO& out, + const value_idx* rows, + const value_idx* cols, + const value_t* vals, + size_t nnz, + size_t m, + size_t n) +{ + detail::max_duplicates(handle, out, rows, cols, vals, nnz, m, n); +} +}; // END namespace op +}; // END namespace sparse +}; // END namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/reduce.hpp b/cpp/include/raft/sparse/op/reduce.hpp index b181f1c46f..a7e771d157 100644 --- a/cpp/include/raft/sparse/op/reduce.hpp +++ b/cpp/include/raft/sparse/op/reduce.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_REDUCE_H +#define __SPARSE_REDUCE_H #pragma once @@ -81,3 +88,5 @@ void max_duplicates(const raft::handle_t& handle, }; // END namespace op }; // END namespace sparse }; // END namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/row_op.cuh b/cpp/include/raft/sparse/op/row_op.cuh new file mode 100644 index 0000000000..b31d3f29b6 --- /dev/null +++ b/cpp/include/raft/sparse/op/row_op.cuh @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_ROW_OP_H +#define __SPARSE_ROW_OP_H +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace op { + +/** + * @brief Perform a custom row operation on a CSR matrix in batches. + * @tparam T numerical type of row_ind array + * @tparam TPB_X number of threads per block to use for underlying kernel + * @tparam Lambda type of custom operation function + * @param row_ind the CSR row_ind array to perform parallel operations over + * @param n_rows total number vertices in graph + * @param nnz number of non-zeros + * @param op custom row operation functor accepting the row and beginning index. + * @param stream cuda stream to use + */ +template void> +void csr_row_op(const Index_* row_ind, Index_ n_rows, Index_ nnz, Lambda op, cudaStream_t stream) +{ + detail::csr_row_op(row_ind, n_rows, nnz, op, stream); +} + +}; // namespace op +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/row_op.hpp b/cpp/include/raft/sparse/op/row_op.hpp index 5dc115cfce..b3eafafa66 100644 --- a/cpp/include/raft/sparse/op/row_op.hpp +++ b/cpp/include/raft/sparse/op/row_op.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_ROW_OP_H +#define __SPARSE_ROW_OP_H #pragma once @@ -43,3 +50,5 @@ void csr_row_op(const Index_* row_ind, Index_ n_rows, Index_ nnz, Lambda op, cud }; // namespace op }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/slice.cuh b/cpp/include/raft/sparse/op/slice.cuh new file mode 100644 index 0000000000..cd7be1924b --- /dev/null +++ b/cpp/include/raft/sparse/op/slice.cuh @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SLICE_H +#define __SLICE_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace op { + +/** + * Slice consecutive rows from a CSR array and populate newly sliced indptr array + * @tparam value_idx + * @param[in] start_row : beginning row to slice + * @param[in] stop_row : ending row to slice + * @param[in] indptr : indptr of input CSR to slice + * @param[out] indptr_out : output sliced indptr to populate + * @param[in] start_offset : beginning column offset of input indptr + * @param[in] stop_offset : ending column offset of input indptr + * @param[in] stream : cuda stream for ordering events + */ +template +void csr_row_slice_indptr(value_idx start_row, + value_idx stop_row, + const value_idx* indptr, + value_idx* indptr_out, + value_idx* start_offset, + value_idx* stop_offset, + cudaStream_t stream) +{ + detail::csr_row_slice_indptr( + start_row, stop_row, indptr, indptr_out, start_offset, stop_offset, stream); +} + +/** + * Slice rows from a CSR, populate column and data arrays + * @tparam value_idx : data type of CSR index arrays + * @tparam value_t : data type of CSR data array + * @param[in] start_offset : beginning column offset to slice + * @param[in] stop_offset : ending column offset to slice + * @param[in] indices : column indices array from input CSR + * @param[in] data : data array from input CSR + * @param[out] indices_out : output column indices array + * @param[out] data_out : output data array + * @param[in] stream : cuda stream for ordering events + */ +template +void csr_row_slice_populate(value_idx start_offset, + value_idx stop_offset, + const value_idx* indices, + const value_t* data, + value_idx* indices_out, + value_t* data_out, + cudaStream_t stream) +{ + detail::csr_row_slice_populate( + start_offset, stop_offset, indices, data, indices_out, data_out, stream); +} + +}; // namespace op +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/slice.hpp b/cpp/include/raft/sparse/op/slice.hpp index 917233319c..b4e0622ced 100644 --- a/cpp/include/raft/sparse/op/slice.hpp +++ b/cpp/include/raft/sparse/op/slice.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,11 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SLICE_H +#define __SLICE_H #pragma once #include -#include +#include namespace raft { namespace sparse { @@ -75,3 +82,5 @@ void csr_row_slice_populate(value_idx start_offset, }; // namespace op }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh new file mode 100644 index 0000000000..ae0e587c3b --- /dev/null +++ b/cpp/include/raft/sparse/op/sort.cuh @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_SORT_H +#define __SPARSE_SORT_H + +#pragma once + +#include +#include + +namespace raft { +namespace sparse { +namespace op { + +/** + * @brief Sorts the arrays that comprise the coo matrix + * by row and then by column. + * + * @param m number of rows in coo matrix + * @param n number of cols in coo matrix + * @param nnz number of non-zeros + * @param rows rows array from coo matrix + * @param cols cols array from coo matrix + * @param vals vals array from coo matrix + * @param stream: cuda stream to use + */ +template +void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t stream) +{ + detail::coo_sort(m, n, nnz, rows, cols, vals, stream); +} + +/** + * @brief Sort the underlying COO arrays by row + * @tparam T: the type name of the underlying value array + * @param in: COO to sort by row + * @param stream: the cuda stream to use + */ +template +void coo_sort(COO* const in, cudaStream_t stream) +{ + coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); +} + +/** + * Sorts a COO by its weight + * @tparam value_idx + * @tparam value_t + * @param[inout] rows source edges + * @param[inout] cols dest edges + * @param[inout] data edge weights + * @param[in] nnz number of edges in edge list + * @param[in] stream cuda stream for which to order cuda operations + */ +template +void coo_sort_by_weight( + value_idx* rows, value_idx* cols, value_t* data, value_idx nnz, cudaStream_t stream) +{ + detail::coo_sort_by_weight(rows, cols, data, nnz, stream); +} +}; // namespace op +}; // end NAMESPACE sparse +}; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/op/sort.hpp b/cpp/include/raft/sparse/op/sort.hpp index eb5c716976..12a4a77ca9 100644 --- a/cpp/include/raft/sparse/op/sort.hpp +++ b/cpp/include/raft/sparse/op/sort.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_SORT_H +#define __SPARSE_SORT_H #pragma once @@ -72,3 +79,5 @@ void coo_sort_by_weight( }; // namespace op }; // end NAMESPACE sparse }; // end NAMESPACE raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh new file mode 100644 index 0000000000..28bb5aa74b --- /dev/null +++ b/cpp/include/raft/sparse/selection/connect_components.cuh @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CONNECT_COMPONENTS_H +#define __CONNECT_COMPONENTS_H + +#include +#include +#include + +namespace raft { +namespace linkage { + +template +using FixConnectivitiesRedOp = detail::FixConnectivitiesRedOp; + +/** + * Gets the number of unique components from array of + * colors or labels. This does not assume the components are + * drawn from a monotonically increasing set. + * @tparam value_idx + * @param[in] colors array of components + * @param[in] n_rows size of components array + * @param[in] stream cuda stream for which to order cuda operations + * @return total number of components + */ +template +value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream) +{ + return detail::get_n_components(colors, n_rows, stream); +} + +/** + * Connects the components of an otherwise unconnected knn graph + * by computing a 1-nn to neighboring components of each data point + * (e.g. component(nn) != component(self)) and reducing the results to + * include the set of smallest destination components for each source + * component. The result will not necessarily contain + * n_components^2 - n_components number of elements because many components + * will likely not be contained in the neighborhoods of 1-nns. + * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[out] out output edge list containing nearest cross-component + * edges. + * @param[in] X original (row-major) dense matrix for which knn graph should be constructed. + * @param[in] orig_colors array containing component number for each row of X + * @param[in] n_rows number of rows in X + * @param[in] n_cols number of cols in X + * @param[in] reduction_op + * @param[in] metric + */ +template +void connect_components( + const raft::handle_t& handle, + raft::sparse::COO& out, + const value_t* X, + const value_idx* orig_colors, + size_t n_rows, + size_t n_cols, + red_op reduction_op, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) +{ + detail::connect_components(handle, out, X, orig_colors, n_rows, n_cols, reduction_op, metric); +} + +}; // end namespace linkage +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/connect_components.hpp b/cpp/include/raft/sparse/selection/connect_components.hpp index 23d247b50e..83d8fce8ba 100644 --- a/cpp/include/raft/sparse/selection/connect_components.hpp +++ b/cpp/include/raft/sparse/selection/connect_components.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __CONNECT_COMPONENTS_H +#define __CONNECT_COMPONENTS_H #include #include @@ -76,3 +83,5 @@ void connect_components( }; // end namespace linkage }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/detail/connect_components.cuh b/cpp/include/raft/sparse/selection/detail/connect_components.cuh index 2b9ca2d8b5..9cfa2bbd44 100644 --- a/cpp/include/raft/sparse/selection/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/detail/connect_components.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,13 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include +#include #include diff --git a/cpp/include/raft/sparse/selection/detail/knn.cuh b/cpp/include/raft/sparse/selection/detail/knn.cuh index d263f2409f..b1dd6116e7 100644 --- a/cpp/include/raft/sparse/selection/detail/knn.cuh +++ b/cpp/include/raft/sparse/selection/detail/knn.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,15 +21,15 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/raft/sparse/selection/detail/knn_graph.cuh b/cpp/include/raft/sparse/selection/detail/knn_graph.cuh index b222dfd9bd..32b7fd3c63 100644 --- a/cpp/include/raft/sparse/selection/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/selection/detail/knn_graph.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ #include #include -#include +#include -#include +#include #include #include diff --git a/cpp/include/raft/sparse/selection/knn.cuh b/cpp/include/raft/sparse/selection/knn.cuh new file mode 100644 index 0000000000..fd9ab4ac3d --- /dev/null +++ b/cpp/include/raft/sparse/selection/knn.cuh @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SPARSE_KNN_H +#define __SPARSE_KNN_H + +#pragma once + +#include +#include +#include + +namespace raft { +namespace sparse { +namespace selection { + +/** + * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors + * using some distance implementation + * @param[in] idxIndptr csr indptr of the index matrix (size n_idx_rows + 1) + * @param[in] idxIndices csr column indices array of the index matrix (size n_idx_nnz) + * @param[in] idxData csr data array of the index matrix (size idxNNZ) + * @param[in] idxNNZ number of non-zeros for sparse index matrix + * @param[in] n_idx_rows number of data samples in index matrix + * @param[in] n_idx_cols + * @param[in] queryIndptr csr indptr of the query matrix (size n_query_rows + 1) + * @param[in] queryIndices csr indices array of the query matrix (size queryNNZ) + * @param[in] queryData csr data array of the query matrix (size queryNNZ) + * @param[in] queryNNZ number of non-zeros for sparse query matrix + * @param[in] n_query_rows number of data samples in query matrix + * @param[in] n_query_cols number of features in query matrix + * @param[out] output_indices dense matrix for output indices (size n_query_rows * k) + * @param[out] output_dists dense matrix for output distances (size n_query_rows * k) + * @param[in] k the number of neighbors to query + * @param[in] handle CUDA handle.get_stream() to order operations with respect to + * @param[in] batch_size_index maximum number of rows to use from index matrix per batch + * @param[in] batch_size_query maximum number of rows to use from query matrix per batch + * @param[in] metric distance metric/measure to use + * @param[in] metricArg potential argument for metric (currently unused) + */ +template +void brute_force_knn(const value_idx* idxIndptr, + const value_idx* idxIndices, + const value_t* idxData, + size_t idxNNZ, + int n_idx_rows, + int n_idx_cols, + const value_idx* queryIndptr, + const value_idx* queryIndices, + const value_t* queryData, + size_t queryNNZ, + int n_query_rows, + int n_query_cols, + value_idx* output_indices, + value_t* output_dists, + int k, + const raft::handle_t& handle, + size_t batch_size_index = 2 << 14, // approx 1M + size_t batch_size_query = 2 << 14, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded, + float metricArg = 0) +{ + detail::sparse_knn_t(idxIndptr, + idxIndices, + idxData, + idxNNZ, + n_idx_rows, + n_idx_cols, + queryIndptr, + queryIndices, + queryData, + queryNNZ, + n_query_rows, + n_query_cols, + output_indices, + output_dists, + k, + handle, + batch_size_index, + batch_size_query, + metric, + metricArg) + .run(); +} + +}; // namespace selection +}; // namespace sparse +}; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/knn.hpp b/cpp/include/raft/sparse/selection/knn.hpp index 8b2747d104..4158bd40c2 100644 --- a/cpp/include/raft/sparse/selection/knn.hpp +++ b/cpp/include/raft/sparse/selection/knn.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __SPARSE_KNN_H +#define __SPARSE_KNN_H #pragma once @@ -96,3 +103,5 @@ void brute_force_knn(const value_idx* idxIndptr, }; // namespace selection }; // namespace sparse }; // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/knn_graph.cuh b/cpp/include/raft/sparse/selection/knn_graph.cuh new file mode 100644 index 0000000000..7d342db43b --- /dev/null +++ b/cpp/include/raft/sparse/selection/knn_graph.cuh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __KNN_GRAPH_H +#define __KNN_GRAPH_H + +#pragma once + +#include +#include +#include + +#include + +namespace raft { +namespace sparse { +namespace selection { + +/** + * Constructs a (symmetrized) knn graph edge list from + * dense input vectors. + * + * Note: The resulting KNN graph is not guaranteed to be connected. + * + * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[in] X dense matrix of input data samples and observations + * @param[in] m number of data samples (rows) in X + * @param[in] n number of observations (columns) in X + * @param[in] metric distance metric to use when constructing neighborhoods + * @param[out] out output edge list + * @param c + */ +template +void knn_graph(const handle_t& handle, + const value_t* X, + std::size_t m, + std::size_t n, + raft::distance::DistanceType metric, + raft::sparse::COO& out, + int c = 15) +{ + detail::knn_graph(handle, X, m, n, metric, out, c); +} + +}; // namespace selection +}; // namespace sparse +}; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/selection/knn_graph.hpp b/cpp/include/raft/sparse/selection/knn_graph.hpp index 825761d44d..eb035390ce 100644 --- a/cpp/include/raft/sparse/selection/knn_graph.hpp +++ b/cpp/include/raft/sparse/selection/knn_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __KNN_GRAPH_H +#define __KNN_GRAPH_H #pragma once @@ -57,3 +64,5 @@ void knn_graph(const handle_t& handle, }; // namespace selection }; // namespace sparse }; // end namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/ann.cuh b/cpp/include/raft/spatial/knn/ann.cuh new file mode 100644 index 0000000000..2ef2ae0fa4 --- /dev/null +++ b/cpp/include/raft/spatial/knn/ann.cuh @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ANN_H +#define __ANN_H + +#pragma once + +#include "ann_common.h" +#include "detail/ann_quantized_faiss.cuh" + +#include +#include + +namespace raft { +namespace spatial { +namespace knn { + +/** + * @brief Flat C++ API function to build an approximate nearest neighbors index + * from an index array and a set of parameters. + * + * @param[in] handle RAFT handle + * @param[out] index index to be built + * @param[in] params parametrization of the index to be built + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + * @param[in] metricArg metric argument + * @param[in] index_array the index array to build the index with + * @param[in] n number of rows in the index array + * @param[in] D the dimensionality of the index array + */ +template +inline void approx_knn_build_index(raft::handle_t& handle, + raft::spatial::knn::knnIndex* index, + knnIndexParam* params, + raft::distance::DistanceType metric, + float metricArg, + float* index_array, + value_idx n, + value_idx D) +{ + detail::approx_knn_build_index(handle, index, params, metric, metricArg, index_array, n, D); +} + +/** + * @brief Flat C++ API function to perform an approximate nearest neighbors + * search from previously built index and a query array + * + * @param[in] handle RAFT handle + * @param[out] distances distances of the nearest neighbors toward + * their query point + * @param[out] indices indices of the nearest neighbors + * @param[in] index index to perform a search with + * @param[in] k the number of nearest neighbors to search for + * @param[in] query_array the query to perform a search with + * @param[in] n number of rows in the query array + */ +template +inline void approx_knn_search(raft::handle_t& handle, + float* distances, + int64_t* indices, + raft::spatial::knn::knnIndex* index, + value_idx k, + float* query_array, + value_idx n) +{ + detail::approx_knn_search(handle, distances, indices, index, k, query_array, n); +} + +} // namespace knn +} // namespace spatial +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/ann.hpp b/cpp/include/raft/spatial/knn/ann.hpp index 5f64a8d1b5..bb11a2b11b 100644 --- a/cpp/include/raft/spatial/knn/ann.hpp +++ b/cpp/include/raft/spatial/knn/ann.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __ANN_H +#define __ANN_H #pragma once @@ -80,3 +87,5 @@ inline void approx_knn_search(raft::handle_t& handle, } // namespace knn } // namespace spatial } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/ann_common.h b/cpp/include/raft/spatial/knn/ann_common.h index 339ca3687a..5cdd6b1141 100644 --- a/cpp/include/raft/spatial/knn/ann_common.h +++ b/cpp/include/raft/spatial/knn/ann_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/spatial/knn/ball_cover.cuh b/cpp/include/raft/spatial/knn/ball_cover.cuh new file mode 100644 index 0000000000..df797ecca2 --- /dev/null +++ b/cpp/include/raft/spatial/knn/ball_cover.cuh @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BALL_COVER_H +#define __BALL_COVER_H + +#pragma once + +#include + +#include "ball_cover_common.h" +#include "detail/ball_cover.cuh" +#include "detail/ball_cover/common.cuh" +#include +#include + +namespace raft { +namespace spatial { +namespace knn { + +template +void rbc_build_index(const raft::handle_t& handle, + BallCoverIndex& index) +{ + ASSERT(index.n == 2, "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_build_index(handle, index, detail::HaversineFunc()); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_build_index(handle, index, detail::EuclideanFunc()); + } else { + RAFT_FAIL("Metric not support"); + } + + index.set_index_trained(); +} + +/** + * Performs a faster exact knn in metric spaces using the triangle + * inequality with a number of landmark points to reduce the + * number of distance computations from O(n^2) to O(sqrt(n)). This + * performs an all neighbors knn, which can reuse memory when + * the index and query are the same array. This function will + * build the index and assumes rbc_build_index() has not already + * been called. + * @tparam value_idx knn index type + * @tparam value_t knn distance type + * @tparam value_int type for integers, such as number of rows/cols + * @param handle raft handle for resource management + * @param index ball cover index which has not yet been built + * @param k number of nearest neighbors to find + * @param perform_post_filtering if this is false, only the closest k landmarks + * are considered (which will return approximate + * results). + * @param[out] inds output knn indices + * @param[out] dists output knn distances + * @param weight a weight for overlap between the closest landmark and + * the radius of other landmarks when pruning distances. + * Setting this value below 1 can effectively turn off + * computing distances against many other balls, enabling + * approximate nearest neighbors. Recall can be adjusted + * based on how many relevant balls are ignored. Note that + * many datasets can still have great recall even by only + * looking in the closest landmark. + */ +template +void rbc_all_knn_query(const raft::handle_t& handle, + BallCoverIndex& index, + value_int k, + value_idx* inds, + value_t* dists, + bool perform_post_filtering = true, + float weight = 1.0) +{ + ASSERT(index.n == 2, "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_all_knn_query(handle, + index, + k, + inds, + dists, + detail::HaversineFunc(), + perform_post_filtering, + weight); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_all_knn_query(handle, + index, + k, + inds, + dists, + detail::EuclideanFunc(), + perform_post_filtering, + weight); + } else { + RAFT_FAIL("Metric not supported"); + } + + index.set_index_trained(); +} + +/** + * Performs a faster exact knn in metric spaces using the triangle + * inequality with a number of landmark points to reduce the + * number of distance computations from O(n^2) to O(sqrt(n)). This + * function does not build the index and assumes rbc_build_index() has + * already been called. Use this function when the index and + * query arrays are different, otherwise use rbc_all_knn_query(). + * @tparam value_idx index type + * @tparam value_t distances type + * @tparam value_int integer type for size info + * @param handle raft handle for resource management + * @param index ball cover index which has not yet been built + * @param k number of nearest neighbors to find + * @param query the + * @param perform_post_filtering if this is false, only the closest k landmarks + * are considered (which will return approximate + * results). + * @param[out] inds output knn indices + * @param[out] dists output knn distances + * @param weight a weight for overlap between the closest landmark and + * the radius of other landmarks when pruning distances. + * Setting this value below 1 can effectively turn off + * computing distances against many other balls, enabling + * approximate nearest neighbors. Recall can be adjusted + * based on how many relevant balls are ignored. Note that + * many datasets can still have great recall even by only + * looking in the closest landmark. + * @param[in] n_query_pts number of query points + */ +template +void rbc_knn_query(const raft::handle_t& handle, + BallCoverIndex& index, + value_int k, + const value_t* query, + value_int n_query_pts, + value_idx* inds, + value_t* dists, + bool perform_post_filtering = true, + float weight = 1.0) +{ + ASSERT(index.n == 2, "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_knn_query(handle, + index, + k, + query, + n_query_pts, + inds, + dists, + detail::HaversineFunc(), + perform_post_filtering, + weight); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_knn_query(handle, + index, + k, + query, + n_query_pts, + inds, + dists, + detail::EuclideanFunc(), + perform_post_filtering, + weight); + } else { + RAFT_FAIL("Metric not supported"); + } +} + +// TODO: implement functions for: +// 4. rbc_eps_neigh() - given a populated index, perform query against different query array +// 5. rbc_all_eps_neigh() - populate a BallCoverIndex and query against training data + +} // namespace knn +} // namespace spatial +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/ball_cover.hpp b/cpp/include/raft/spatial/knn/ball_cover.hpp index d44e87710b..26c2c1fb2e 100644 --- a/cpp/include/raft/spatial/knn/ball_cover.hpp +++ b/cpp/include/raft/spatial/knn/ball_cover.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @warning This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ + +#ifndef __BALL_COVER_H +#define __BALL_COVER_H #pragma once @@ -185,3 +192,5 @@ void rbc_knn_query(const raft::handle_t& handle, } // namespace knn } // namespace spatial } // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/ball_cover_common.h b/cpp/include/raft/spatial/knn/ball_cover_common.h index e1a202107b..0567e124d9 100644 --- a/cpp/include/raft/spatial/knn/ball_cover_common.h +++ b/cpp/include/raft/spatial/knn/ball_cover_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh index 4d9bfd82ad..78631b431f 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include #include