rapidsai · rapids-bot · Dec 14, 2020 · Sep 1, 2020 · Sep 2, 2020 · Sep 3, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## New Features
 - PR #2698: Distributed TF-IDF transformer
+- PR #2780: Multiple KNN strategies
 
 ## Improvements
 - PR #2796: Remove tokens of length 1 by default for text vectorizers

@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <faiss/gpu/GpuIndex.h>
+#include <faiss/gpu/StandardGpuResources.h>
 #include <common/cumlHandle.hpp>
 #include <cuml/common/logger.hpp>
 
@@ -36,36 +38,87 @@ enum MetricType {
   METRIC_Correlation
 };
 
+struct knnIndex {
+  faiss::gpu::StandardGpuResources *gpu_res;
+  faiss::gpu::GpuIndex *index;
+  int device;
+  ~knnIndex() {
+    delete gpu_res;
+    delete index;
+  }
+};
+
+typedef enum {
+  QT_8bit,
+  QT_4bit,
+  QT_8bit_uniform,
+  QT_4bit_uniform,
+  QT_fp16,
+  QT_8bit_direct,
+  QT_6bit
+} QuantizerType;
+
+struct knnIndexParam {
+  bool automated;
+  virtual bool isBaseClass() { return true; }
+};
+
+struct IVFParam : knnIndexParam {
+  int nlist;
+  int nprobe;
+};
+
+struct IVFFlatParam : IVFParam {};
+
+struct IVFPQParam : IVFParam {
+  int M;
+  int n_bits;
+  bool usePrecomputedTables;
+};
+
+struct IVFSQParam : IVFParam {
+  QuantizerType qtype;
+  bool encodeResidual;
+};
+
 /**
-   * @brief Flat C++ API function to perform a brute force knn on
-   * a series of input arrays and combine the results into a single
-   * output array for indexes and distances.
-   *
-   * @param[in] handle the cuml handle to use
-   * @param[in] input vector of pointers to the input arrays
-   * @param[in] sizes vector of sizes of input arrays
-   * @param[in] D the dimensionality of the arrays
-   * @param[in] search_items array of items to search of dimensionality D
-   * @param[in] n number of rows in search_items
-   * @param[out] res_I the resulting index array of size n * k
-   * @param[out] res_D the resulting distance array of size n * k
-   * @param[in] k the number of nearest neighbors to return
-   * @param[in] rowMajorIndex are the index arrays in row-major order?
-   * @param[in] rowMajorQuery are the query arrays in row-major order?
-   * @param[in] metric distance metric to use. Euclidean (L2) is used by
-   * 			   default
+ * @brief Flat C++ API function to perform a brute force knn on
+ * a series of input arrays and combine the results into a single
+ * output array for indexes and distances.
+ *
+ * @param[in] handle the cuml handle to use
+ * @param[in] input vector of pointers to the input arrays
+ * @param[in] sizes vector of sizes of input arrays
+ * @param[in] D the dimensionality of the arrays
+ * @param[in] search_items array of items to search of dimensionality D
+ * @param[in] n number of rows in search_items
+ * @param[out] res_I the resulting index array of size n * k
+ * @param[out] res_D the resulting distance array of size n * k
+ * @param[in] k the number of nearest neighbors to return
+ * @param[in] rowMajorIndex are the index arrays in row-major order?
+ * @param[in] rowMajorQuery are the query arrays in row-major order?
+ * @param[in] metric distance metric to use. Euclidean (L2) is used by
+ * 			   default
  * @param[in] metric_arg the value of `p` for Minkowski (l-p) distances. This
  * 					 is ignored if the metric_type is not Minkowski.
  * @param[in] expanded should lp-based distances be returned in their expanded
  * 					 form (e.g., without raising to the 1/p power).
-   */
+ */
 void brute_force_knn(cumlHandle &handle, std::vector<float *> &input,
                      std::vector<int> &sizes, int D, float *search_items, int n,
                      int64_t *res_I, float *res_D, int k,
                      bool rowMajorIndex = false, bool rowMajorQuery = false,
                      MetricType metric = MetricType::METRIC_L2,
                      float metric_arg = 2.0f, bool expanded = false);
 
+void approx_knn_build_index(cumlHandle &handle, ML::knnIndex *index,
+                            ML::knnIndexParam *params, int D,
+                            ML::MetricType metric, float metricArg,
+                            float *index_items, int n);
+
+void approx_knn_search(ML::knnIndex *index, int n, const float *x, int k,
+                       float *distances, int64_t *labels);
+
 /**
  * @brief Flat C++ API function to perform a knn classification using a
  * given a vector of label arrays. This supports multilabel classification

@@ -49,6 +49,19 @@ void brute_force_knn(cumlHandle &handle, std::vector<float *> &input,
     rowMajorQuery, nullptr, metric, metric_arg, expanded);
 }
 
+void approx_knn_build_index(cumlHandle &handle, ML::knnIndex *index,
+                            ML::knnIndexParam *params, int D,
+                            ML::MetricType metric, float metricArg,
+                            float *index_items, int n) {
+  MLCommon::Selection::approx_knn_build_index(
+    index, params, D, metric, metricArg, index_items, n, handle.getStream());
+}
+
+void approx_knn_search(ML::knnIndex *index, int n, const float *x, int k,
+                       float *distances, int64_t *labels) {
+  MLCommon::Selection::approx_knn_search(index, n, x, k, distances, labels);
+}
+
 void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices,
                   std::vector<int *> &y, size_t n_index_rows,
                   size_t n_query_rows, int k) {

@@ -24,6 +24,9 @@
 
 #include <faiss/gpu/GpuDistance.h>
 #include <faiss/gpu/GpuIndexFlat.h>
+#include <faiss/gpu/GpuIndexIVFFlat.h>
+#include <faiss/gpu/GpuIndexIVFPQ.h>
+#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
 #include <faiss/gpu/GpuResources.h>
 #include <faiss/gpu/StandardGpuResources.h>
 #include <faiss/utils/Heap.h>
@@ -40,6 +43,7 @@
 #include <cuml/neighbors/knn.hpp>
 
 #include <iostream>
+#include <set>
 
 namespace MLCommon {
 namespace Selection {
@@ -186,32 +190,192 @@ inline faiss::MetricType build_faiss_metric(ML::MetricType metric) {
   }
 }
 
+inline faiss::ScalarQuantizer::QuantizerType build_faiss_qtype(
+  ML::QuantizerType qtype) {
+  switch (qtype) {
+    case ML::QuantizerType::QT_8bit:
+      return faiss::ScalarQuantizer::QuantizerType::QT_8bit;
+    case ML::QuantizerType::QT_8bit_uniform:
+      return faiss::ScalarQuantizer::QuantizerType::QT_8bit_uniform;
+    case ML::QuantizerType::QT_4bit_uniform:
+      return faiss::ScalarQuantizer::QuantizerType::QT_4bit_uniform;
+    case ML::QuantizerType::QT_fp16:
+      return faiss::ScalarQuantizer::QuantizerType::QT_fp16;
+    case ML::QuantizerType::QT_8bit_direct:
+      return faiss::ScalarQuantizer::QuantizerType::QT_8bit_direct;
+    case ML::QuantizerType::QT_6bit:
+      return faiss::ScalarQuantizer::QuantizerType::QT_6bit;
+    default:
+      return (faiss::ScalarQuantizer::QuantizerType)qtype;
+  }
+}
+
+const std::set<int> allowedSubDimSize = {1,  2,  3,  4,  6,  8, 10,
+                                         12, 16, 20, 24, 28, 32};
+const std::initializer_list<int> allowedSubquantizers = {32, 28, 24, 20, 16, 12,
+                                                         8,  4,  3,  2,  1};
+
+template <typename IntType = int>
+void approx_knn_ivfflat_build_index(ML::knnIndex *index, ML::IVFParam *params,
+                                    IntType D, ML::MetricType metric,
+                                    IntType n) {
+  if (params->automated) {
+    params->nlist = 8;
+    params->nprobe = params->nlist * 0.3;
+  }
+
+  faiss::gpu::GpuIndexIVFFlatConfig config;
+  config.device = index->device;
+  faiss::MetricType faiss_metric = build_faiss_metric(metric);
+  faiss::gpu::GpuIndexIVFFlat *faiss_index = new faiss::gpu::GpuIndexIVFFlat(
+    index->gpu_res, D, params->nlist, faiss_metric, config);
+  faiss_index->setNumProbes(params->nprobe);
+  index->index = faiss_index;
+}
+
+template <typename IntType = int>
+void approx_knn_ivfpq_build_index(ML::knnIndex *index, ML::IVFPQParam *params,
+                                  IntType D, ML::MetricType metric, IntType n) {
+  if (params->automated) {
+    params->M = 0;
+    params->n_bits = 0;
+    params->nlist = 8;
+    params->nprobe = params->nlist * 0.3;
+
+    for (int n_subq : allowedSubquantizers) {
+      if (D % n_subq == 0 &&
+          allowedSubDimSize.find(D / n_subq) != allowedSubDimSize.end()) {
+        params->usePrecomputedTables = false;
+        params->M = n_subq;
+        break;
+      }
+    }
+
+    if (params->M == 0) {
+      for (int n_subq : allowedSubquantizers) {
+        if (D % n_subq == 0) {
+          params->usePrecomputedTables = true;
+          params->M = n_subq;
+          break;
+        }
+      }
+    }
+
+    for (size_t i = 8; i > 0; --i) {
+      size_t min_train_points = std::pow(2, i) * 39;
+      if (n >= min_train_points) {
+        params->n_bits = i;
+        break;
+      }
+    }
+  }
+
+  faiss::gpu::GpuIndexIVFPQConfig config;
+  config.device = index->device;
+  config.usePrecomputedTables = params->usePrecomputedTables;
+  faiss::MetricType faiss_metric = build_faiss_metric(metric);
+  faiss::gpu::GpuIndexIVFPQ *faiss_index =
+    new faiss::gpu::GpuIndexIVFPQ(index->gpu_res, D, params->nlist, params->M,
+                                  params->n_bits, faiss_metric, config);
+  faiss_index->setNumProbes(params->nprobe);
+  index->index = faiss_index;
+}
+
+template <typename IntType = int>
+void approx_knn_ivfsq_build_index(ML::knnIndex *index, ML::IVFSQParam *params,
+                                  IntType D, ML::MetricType metric, IntType n) {
+  if (params->automated) {
+    params->nlist = 8;
+    params->nprobe = params->nlist * 0.3;
+    params->qtype = ML::QuantizerType::QT_8bit;
+    params->encodeResidual = true;
+  }
+
+  faiss::gpu::GpuIndexIVFScalarQuantizerConfig config;
+  config.device = index->device;
+  faiss::MetricType faiss_metric = build_faiss_metric(metric);
+  faiss::ScalarQuantizer::QuantizerType faiss_qtype =
+    build_faiss_qtype(params->qtype);
+  faiss::gpu::GpuIndexIVFScalarQuantizer *faiss_index =
+    new faiss::gpu::GpuIndexIVFScalarQuantizer(index->gpu_res, D, params->nlist,
+                                               faiss_qtype, faiss_metric,
+                                               params->encodeResidual);
+  faiss_index->setNumProbes(params->nprobe);
+  index->index = faiss_index;
+}
+
+template <typename IntType = int>
+void approx_knn_build_index(ML::knnIndex *index, ML::knnIndexParam *params,
+                            IntType D, ML::MetricType metric, float metricArg,
+                            float *index_items, IntType n,
+                            cudaStream_t userStream) {
+  int device;
+  CUDA_CHECK(cudaGetDevice(&device));
+
+  faiss::gpu::StandardGpuResources *gpu_res =
+    new faiss::gpu::StandardGpuResources();
+  gpu_res->noTempMemory();
+  gpu_res->setCudaMallocWarning(false);
+  gpu_res->setDefaultStream(device, userStream);
+  index->gpu_res = gpu_res;
+  index->device = device;
+  index->index = nullptr;
+
+  if (dynamic_cast<ML::IVFFlatParam *>(params)) {
+    ML::IVFFlatParam *IVFFlat_param = dynamic_cast<ML::IVFFlatParam *>(params);
+    approx_knn_ivfflat_build_index(index, IVFFlat_param, D, metric, n);
+    std::vector<float> h_index_items(n * D);
+    updateHost(h_index_items.data(), index_items, h_index_items.size(),
+               userStream);
+    index->index->train(n, h_index_items.data());
+    index->index->add(n, h_index_items.data());
+    return;
+  } else if (dynamic_cast<ML::IVFPQParam *>(params)) {
+    ML::IVFPQParam *IVFPQ_param = dynamic_cast<ML::IVFPQParam *>(params);
+    approx_knn_ivfpq_build_index(index, IVFPQ_param, D, metric, n);
+  } else if (dynamic_cast<ML::IVFSQParam *>(params)) {
+    ML::IVFSQParam *IVFSQ_param = dynamic_cast<ML::IVFSQParam *>(params);
+    approx_knn_ivfsq_build_index(index, IVFSQ_param, D, metric, n);
+  } else {
+    ASSERT(index->index, "KNN index could not be initialized");
+  }
+
+  index->index->train(n, index_items);
+  index->index->add(n, index_items);
+}
+
+template <typename IntType = int>
+void approx_knn_search(ML::knnIndex *index, IntType n, const float *x,
+                       IntType k, float *distances, int64_t *labels) {
+  index->index->search(n, x, k, distances, labels);
+}
+
 /**
-   * Search the kNN for the k-nearest neighbors of a set of query vectors
-   * @param[in] input vector of device device memory array pointers to search
-   * @param[in] sizes vector of memory sizes for each device array pointer in input
-   * @param[in] D number of cols in input and search_items
-   * @param[in] search_items set of vectors to query for neighbors
-   * @param[in] n        number of items in search_items
-   * @param[out] res_I    pointer to device memory for returning k nearest indices
-   * @param[out] res_D    pointer to device memory for returning k nearest distances
-   * @param[in] k        number of neighbors to query
-   * @param[in] allocator the device memory allocator to use for temporary scratch memory
-   * @param[in] userStream the main cuda stream to use
-   * @param[in] internalStreams optional when n_params > 0, the index partitions can be
-   *        queried in parallel using these streams. Note that n_int_streams also
-   *        has to be > 0 for these to be used and their cardinality does not need
-   *        to correspond to n_parts.
-   * @param[in] n_int_streams size of internalStreams. When this is <= 0, only the
-   *        user stream will be used.
-   * @param[in] rowMajorIndex are the index arrays in row-major layout?
-   * @param[in] rowMajorQuery are the query array in row-major layout?
-   * @param[in] translations translation ids for indices when index rows represent
-   *        non-contiguous partitions
-   * @param[in] metric corresponds to the FAISS::metricType enum (default is euclidean)
-   * @param[in] metricArg metric argument to use. Corresponds to the p arg for lp norm
-   * @param[in] expanded_form whether or not lp variants should be reduced w/ lp-root
-   */
+ * Search the kNN for the k-nearest neighbors of a set of query vectors
+ * @param[in] input vector of device device memory array pointers to search
+ * @param[in] sizes vector of memory sizes for each device array pointer in input
+ * @param[in] D number of cols in input and search_items
+ * @param[in] search_items set of vectors to query for neighbors
+ * @param[in] n        number of items in search_items
+ * @param[out] res_I    pointer to device memory for returning k nearest indices
+ * @param[out] res_D    pointer to device memory for returning k nearest distances
+ * @param[in] k        number of neighbors to query
+ * @param[in] allocator the device memory allocator to use for temporary scratch memory
+ * @param[in] userStream the main cuda stream to use
+ * @param[in] internalStreams optional when n_params > 0, the index partitions can be
+ *        queried in parallel using these streams. Note that n_int_streams also
+ *        has to be > 0 for these to be used and their cardinality does not need
+ *        to correspond to n_parts.
+ * @param[in] n_int_streams size of internalStreams. When this is <= 0, only the
+ *        user stream will be used.
+ * @param[in] rowMajorIndex are the index arrays in row-major layout?
+ * @param[in] rowMajorQuery are the query array in row-major layout?
+ * @param[in] translations translation ids for indices when index rows represent
+ *        non-contiguous partitions
+ * @param[in] metric corresponds to the FAISS::metricType enum (default is euclidean)
+ * @param[in] metricArg metric argument to use. Corresponds to the p arg for lp norm
+ * @param[in] expanded_form whether or not lp variants should be reduced w/ lp-root
+ */
 template <typename IntType = int>
 void brute_force_knn(std::vector<float *> &input, std::vector<int> &sizes,
                      IntType D, float *search_items, IntType n, int64_t *res_I,

@@ -21,12 +21,17 @@
 from cuml.neighbors.kneighbors_classifier import KNeighborsClassifier
 from cuml.neighbors.kneighbors_regressor import KNeighborsRegressor
 
-VALID_METRICS = {"brute": set([
+VALID_METRICS = {
+    "brute": set([
         "l2", "euclidean",
         "l1", "cityblock", "manhattan", "taxicab",
         "braycurtis", "canberra",
         "minkowski", "lp",
         "chebyshev", "linf",
         "jensenshannon",
         "cosine", "correlation"
-    ])}
+    ]),
+    "ivfflat": set(["l2", "euclidean"]),
+    "ivfpq": set(["l2", "euclidean"]),
+    "ivfsq": set(["l2", "euclidean"])
+}