cornellius-gp · alexpeters1208 · Nov 1, 2022 · Nov 1, 2022 · Nov 3, 2022 · Nov 4, 2022
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -259,7 +259,7 @@ def _process(annotation, config):
     # For any linear_operator class, the format will be e.g. "~linear_operator.operators.TriangularLinearOperator"
     # For any internal class, the format will be e.g. "~gpytorch.kernels.RBFKernel"
     elif hasattr(annotation, "__name__"):
-        module = annotation.__module__ + "."
+        module = str(annotation.__module__) + "."
         if module.split(".")[0] == "linear_operator":
             if annotation.__name__.endswith("LinearOperator"):
                 module = "~linear_operator."

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -33,6 +33,7 @@ GPyTorch's documentation
    means
    marginal_log_likelihoods
    metrics
+   nearest_neighbors
    constraints
    distributions
    priors

diff --git a/docs/source/nearest_neighbors.rst b/docs/source/nearest_neighbors.rst
@@ -0,0 +1,67 @@
+.. role:: hidden
+    :class: hidden-section
+
+gpytorch.nearest_neighbors
+===================================
+
+These modules provide a set of interfaces for partitioning datasets and establishing
+neighborhood structures between partitions. This kind of partitioning is required for
+nearest-neighbor-style Gaussian Process models, and we ensure behind the scenes that nearest-neighbor models
+based on these partitions still form valid joint density functions.
+
+.. automodule:: gpytorch.nearest_neighbors
+.. currentmodule:: gpytorch.nearest_neighbors
+
+
+Indexes
+-----------------------------
+
+Indexes are the interfaces used to partition datasets with clustering algorithms, measure distance
+between partitions with a distance metric for establishing neighboring structure, and ordering
+the data with ordering strategies.
+
+:hidden:`KMeansIndex`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: KMeansIndex
+   :members:
+
+
+:hidden:`VoronoiIndex`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: VoronoiIndex
+   :members:
+
+
+Distance Metrics
+-----------------------------
+
+Distance metrics are used to define distances between partitions of data. Each index defines the
+points that represent each block, and distance between blocks is defined as the distance between
+these representatives per the supplied distance metric. The DistanceMetrics class includes methods
+for Euclidean distance and Manhattan distance metrics, and custom distance metrics must return
+functions that take in vectors of observations and return the distance matrix for those observations.
+
+:hidden:`DistanceMetrics`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: DistanceMetrics
+   :members:
+
+
+Ordering Strategies
+-----------------------------
+
+Because nearest-neighbor approximations depend on the ordering of the data they're trained on, we need a way
+to order the dataset by different metrics to find the best ordering strategy for a given problem.
+The OrderingStrategies class includes methods for ordering the data by a given coordinate or by an
+:math:`L_p` norm. Custom ordering strategies can be implemented here and must return a function that
+takes in a vector of observations and returns a vector of integers indicating the index of each observation
+under the new ordering.
+
+:hidden:`OrderingStrategies`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: OrderingStrategies
+   :members:
diff --git a/examples/04_Variational_and_Approximate_GPs/Getting_Started_with_Nearest_Neighbors.ipynb b/examples/04_Variational_and_Approximate_GPs/Getting_Started_with_Nearest_Neighbors.ipynb
diff --git a/examples/04_Variational_and_Approximate_GPs/index.rst b/examples/04_Variational_and_Approximate_GPs/index.rst
@@ -25,6 +25,7 @@ Here we provide some examples which highlight some of the common use cases:
 - **Variational inference with natural gradient descent** (for faster/better optimization): see the `ngd example`_.
 - **Variational inference with contour integral quadrature** (for large numbers of inducing points): see the `ciq example`_.
 - **Variational inference with nearest neighbor approximation** (for large numbers of inducing points): see the `vnngp example`_.
+- **Customizing nearest-neighbor approximations** for alternative to variational inference: see the `nearest neighbors introduction notebook`_.
 - **Variational distribution options** for different scalability/expressiveness: see the `strategy/distribution comparison`_.
 - **Alternative optimization objectives** for the GP's predictive distribution: see the `approximate GP objective functions notebook`_.
   This example compares and contrasts the variational ELBO with the predictive log likelihood of Jankowiak et al., 2020.
@@ -46,6 +47,7 @@ Here we provide some examples which highlight some of the common use cases:
    PolyaGamma_Binary_Classification.ipynb
    SVGP_Multitask_GP_Regression.ipynb
    GP_Regression_with_Uncertain_Inputs.ipynb
+   Getting_Started_with_Nearest_Neighbors.ipynb
 
 .. _strategy/distribution comparison:
   ./Modifying_the_variational_strategy_and_distribution.ipynb
@@ -76,3 +78,6 @@ Here we provide some examples which highlight some of the common use cases:
 
 .. _GPs with uncertain inputs example:
   ./GP_Regression_with_Uncertain_Inputs.ipynb
+
+.. _nearest neighbors introduction notebook:
+  ./Getting_Started_with_Nearest_Neighbors.ipynb
diff --git a/gpytorch/nearest_neighbors/__init__.py b/gpytorch/nearest_neighbors/__init__.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+from .distance_metrics import *
+from .ordering_strategies import *
+
+from ._index import BaseIndex
+from .k_means_index import KMeansIndex
+from .voronoi_index import VoronoiIndex
+
+__all__ = [
+    "DistanceMetrics",
+    "OrderingStrategies",
+    "BaseIndex",
+    "KMeansIndex",
+    "VoronoiIndex",
+]
+
+
+# TODO: Where to put this???
+# this function uses an index to compute block mean and covariance for a Vecchia-style GP. Until we have a more
+# concrete nearest_neighbors module, I do not know where to put this.
+def compute_mean_covar(index, x1, x2, y, mean_module, covar_module, training):
+
+    # extract relevant info from index
+    n_blocks = len(index.blocks)
+    n_neighbors = len(index.neighbors[-1])
+
+    # create empty lists to hold block means and covariances
+    mean_list = []
+    cov_list = []
+
+    if training:
+        # append mean function applied to first block in first spot
+        mean_list.append(mean_module(x1[index.blocks[0]]))
+        # append within covariance block to first spot
+        cov_list.append(covar_module(x1[index.blocks[0]], x2[index.blocks[0]]))
+
+        if n_neighbors == 0:
+            # if no neighbors, all index are independent, so simply evaluate mean and covariance for each block
+            for i in range(1, n_blocks):
+                mean_list.append(mean_module(x1[index.blocks[i]]))
+                cov_list.append(covar_module(x1[index.blocks[i]], x2[index.blocks[i]]))
+
+        else:
+            for i in range(1, n_blocks):
+                # these calculations come from bottom of P7, Quiroz et al, 2021
+                c_within = covar_module(x1[index.blocks[i]], x2[index.blocks[i]])
+                c_between = covar_module(x1[index.blocks[i]], x2[index.neighbors[i]])
+                c_neighbors = covar_module(x1[index.neighbors[i]], x2[index.neighbors[i]])
+
+                # use cholesky decomposition to compute inverse, may be numerically unstable with large n_neighbors
+                l_inv = c_neighbors.cholesky().inverse()
+                # compute mean
+                b = c_between @ l_inv.t() @ l_inv
+                mean = mean_module(x1[index.blocks[i]]) + b @ (
+                    y[index.neighbors[i]] - mean_module(x2[index.neighbors[i]])
+                )
+                # compute covariance
+                f = c_within - (c_between @ l_inv.t() @ l_inv @ c_between.t())
+
+                mean_list.append(mean)
+                cov_list.append(f)
+
+    else:
+        for i in range(0, len(index.blocks)):
+            c_within = covar_module(x1[index.test_blocks[i]], x1[index.test_blocks[i]])
+            c_between = covar_module(x1[index.test_blocks[i]], x2[index.test_neighbors[i]])
+            c_neighbors = covar_module(x2[index.test_neighbors[i]], x2[index.test_neighbors[i]])
+
+            # use cholesky decomposition to compute needed terms, may be numerically unstable with large n_neighbors
+            l_inv = c_neighbors.cholesky().inverse()
+            # compute mean
+            b = c_between @ l_inv.t() @ l_inv
+            mean = mean_module(x1[index.test_blocks[i]]) + b @ (
+                y[index.test_neighbors[i]] - mean_module(x2[index.test_neighbors[i]])
+            )
+            # compute covariance
+            f = c_within - (c_between @ l_inv.t() @ l_inv @ c_between.t())
+
+            mean_list.append(mean)
+            cov_list.append(f)
+
+    return mean_list, cov_list