pymc-devs · AlexAndorra · Jun 10, 2024 · Jun 1, 2024 · Jun 1, 2024 · Jun 1, 2024
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
@@ -17,7 +17,6 @@
 
 from collections.abc import Sequence
 from types import ModuleType
-from typing import NamedTuple
 
 import numpy as np
 import pytensor.tensor as pt
@@ -31,11 +30,14 @@
 TensorLike = np.ndarray | pt.TensorVariable
 
 
-def set_boundary(Xs: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
-    """Set the boundary using the `Xs` centered around 0 and `c`. `c` is usually a scalar
-    multiplier greater than 1.0, but it may be one value per dimension or column of `Xs`.
+def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
+    """Set the boundary using `X` and `c`.  `X` can be centered around zero but doesn't have to be,
+    and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per
+    dimension or column of `X`.
     """
-    S = pt.max(pt.abs(Xs), axis=0)  # important: the Xs should be centered around 0
+    # compute radius. Works whether X is 0-centered or not
+    S = (pt.max(X, axis=0) - pt.min(X, axis=0)) / 2.0
+
     L = (c * S).eval()  # eval() makes sure L is not changed with out-of-sample preds
     return L
 
@@ -89,44 +91,42 @@ def calc_basis_periodic(
     return phi_cos, phi_sin
 
 
-class HSGPParams(NamedTuple):
-    m: int
-    c: float
-    S: float
-
-
 def approx_hsgp_hyperparams(
-    x: np.ndarray, lengthscale_range: list[float], cov_func: str
-) -> HSGPParams:
+    x_range: list[float], lengthscale_range: list[float], cov_func: str
+) -> tuple[int, float]:
     """Utility function that uses heuristics to recommend minimum `m` and `c` values,
     based on recommendations from Ruitort-Mayol et. al.
 
     In practice, you need to choose `c` large enough to handle the largest lengthscales,
-    and `m` large enough to accommodate the smallest lengthscales.
+    and `m` large enough to accommodate the smallest lengthscales.  Use your prior on the
+    lengthscale as guidance for setting the prior range.  For example, if you believe
+    that 95% of the prior mass of the lengthscale is between 1 and 5, set the
+    `lengthscale_range` to be [1, 5], or maybe a touch wider.
+
+    Also, be sure to pass in an `x_range` that is exemplary of the domain not just of your
+    training data, but also where you intend to make predictions.  For instance, if your
+    training x values are from [0, 10], and you intend to predict from [7, 15], the narrowest
+    `x_range` you should pass in would be `x_range = [0, 15]`.
 
     NB: These recommendations are based on a one-dimensional GP.
 
     Parameters
     ----------
-    x : np.ndarray
-        The input variable on which the GP is going to be evaluated.
-        Careful: should be the X values you want to predict over, not *only* the training X.
+    x_range : list[float]
+        The range of the x values you intend to both train and predict over.  Should be a list with
+        two elements, [x_min, x_max].
     lengthscale_range : List[float]
-        The range of the lengthscales. Should be a list with two elements [lengthscale_min, lengthscale_max].
+        The range of the lengthscales. Should be a list with two elements, [lengthscale_min, lengthscale_max].
     cov_func : str
         The covariance function to use. Supported options are "expquad", "matern52", and "matern32".
 
     Returns
     -------
-    HSGPParams
-        A named tuple containing the recommended values for `m`, `c`, and `S`.
-        - `m` : int
-            Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost.
-        - `c` : float
-            Scaling factor such that L = c * S, where L is the boundary of the approximation.
-            Increasing it helps approximate larger lengthscales, but may require increasing m.
-        - `S` : float
-            The value of `S`, which is half the range of `x`.
+    - `m` : int
+        Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost.
+    - `c` : float
+        Scaling factor such that L = c * S, where L is the boundary of the approximation.
+        Increasing it helps approximate larger lengthscales, but may require increasing m.
 
     Raises
     ------
@@ -139,11 +139,12 @@ def approx_hsgp_hyperparams(
     Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
     """
     if lengthscale_range[0] >= lengthscale_range[1]:
-        raise ValueError("One of the boundaries out of order")
+        raise ValueError("One of the `lengthscale_range` boundaries is out of order.")
 
-    X_center = (np.max(x, axis=0) - np.min(x, axis=0)) / 2
-    Xs = x - X_center
-    S = np.max(np.abs(Xs), axis=0)
+    if x_range[0] >= x_range[1]:
+        raise ValueError("One of the `x_range` boundaries is out of order.")
+
+    S = (x_range[1] - x_range[0]) / 2.0
 
     if cov_func.lower() == "expquad":
         a1, a2 = 3.2, 1.75
@@ -162,7 +163,7 @@ def approx_hsgp_hyperparams(
     c = max(a1 * (lengthscale_range[1] / S), 1.2)
     m = int(a2 * c / (lengthscale_range[0] / S))
 
-    return HSGPParams(m=m, c=c, S=S)
+    return m, c
 
 
 class HSGP(Base):
@@ -286,6 +287,7 @@ def __init__(
 
         if parametrization is not None:
             parametrization = parametrization.lower().replace("-", "").replace("_", "")
+
         if parametrization not in ["centered", "noncentered"]:
             raise ValueError("`parametrization` must be either 'centered' or 'noncentered'.")
 
@@ -321,7 +323,7 @@ def L(self) -> pt.TensorVariable:
     def L(self, value: TensorLike):
         self._L = pt.as_tensor_variable(value)
 
-    def prior_linearized(self, Xs: TensorLike):
+    def prior_linearized(self, X: TensorLike):
         """Linearized version of the HSGP.  Returns the Laplace eigenfunctions and the square root
         of the power spectral density needed to create the GP.
 
@@ -334,7 +336,7 @@ def prior_linearized(self, Xs: TensorLike):
 
         Parameters
         ----------
-        Xs: array-like
+        X: array-like
             Function input values.
 
         Returns
@@ -362,9 +364,9 @@ def prior_linearized(self, Xs: TensorLike):
                 # L = [10] means the approximation is valid from Xs = [-10, 10]
                 gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func)
 
+                # Set X as Data so it can be mutated later, and then pass it to the GP
                 X = pm.Data("X", X)
-                # Pass X to the GP
-                phi, sqrt_psd = gp.prior_linearized(Xs=X)
+                phi, sqrt_psd = gp.prior_linearized(X=X)
 
                 # Specify standard normal prior in the coefficients, the number of which
                 # is given by the number of basis vectors, saved in `n_basis_vectors`.
@@ -394,8 +396,8 @@ def prior_linearized(self, Xs: TensorLike):
         # Important: fix the computation of the midpoint of X.
         # If X is mutated later, the training midpoint will be subtracted, not the testing one.
         if self._X_center is None:
-            self._X_center = (pt.max(Xs, axis=0) - pt.min(Xs, axis=0)).eval() / 2
-        Xs = Xs - self._X_center  # center for accurate computation
+            self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2
+        Xs = X - self._X_center  # center for accurate computation
 
         # Index Xs using input_dim and active_dims of covariance function
         Xs, _ = self.cov_func._slice(Xs)
@@ -588,10 +590,11 @@ def __init__(
 
         self._m = m
         self.scale = scale
+        self._X_center = None
 
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
-    def prior_linearized(self, Xs: TensorLike):
+    def prior_linearized(self, X: TensorLike):
         """Linearized version of the approximation. Returns the cosine and sine bases and coefficients
         of the expansion needed to create the GP.
 
@@ -606,8 +609,8 @@ def prior_linearized(self, Xs: TensorLike):
 
         Parameters
         ----------
-        Xs: array-like
-            Function input values.  Assumes they have been mean subtracted or centered at zero.
+        X: array-like
+            Function input values.
 
         Returns
         -------
@@ -631,15 +634,9 @@ def prior_linearized(self, Xs: TensorLike):
                 # m=200 means 200 basis vectors
                 gp = pm.gp.HSGPPeriodic(m=200, scale=scale, cov_func=cov_func)
 
-                # Order is important.  First calculate the mean, then make X a shared variable,
-                # then subtract the mean.  When X is mutated later, the correct mean will be
-                # subtracted.
-                X_mean = np.mean(X, axis=0)
-                X = pm.MutableData("X", X)
-                Xs = X - X_mean
-
-                # Pass the zero-subtracted Xs in to the GP
-                (phi_cos, phi_sin), psd = gp.prior_linearized(Xs=Xs)
+                # Set X as Data so it can be mutated later, and then pass it to the GP
+                X = pm.Data("X", X)
+                (phi_cos, phi_sin), psd = gp.prior_linearized(X=X)
 
                 # Specify standard normal prior in the coefficients.  The number of which
                 # is twice the number of basis vectors minus one.
@@ -666,6 +663,13 @@ def prior_linearized(self, Xs: TensorLike):
             with model:
                 ppc = pm.sample_posterior_predictive(idata, var_names=["f"])
         """
+        # Important: fix the computation of the midpoint of X.
+        # If X is mutated later, the training midpoint will be subtracted, not the testing one.
+        if self._X_center is None:
+            self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2
+        Xs = X - self._X_center  # center for accurate computation
+
+        # Index Xs using input_dim and active_dims of covariance function
         Xs, _ = self.cov_func._slice(Xs)
 
         phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m, tl=pt)
@@ -688,9 +692,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ign
         dims: None
             Dimension name for the GP random variable.
         """
-        self._X_mean = pt.mean(X, axis=0)
-
-        (phi_cos, phi_sin), psd = self.prior_linearized(X - self._X_mean)
+        (phi_cos, phi_sin), psd = self.prior_linearized(X)
 
         m = self._m
         self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1))
@@ -707,7 +709,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ign
 
     def _build_conditional(self, Xnew):
         try:
-            beta, X_mean = self._beta, self._X_mean
+            beta, X_center = self._beta, self._X_center
 
         except AttributeError:
             raise ValueError(
@@ -716,7 +718,9 @@ def _build_conditional(self, Xnew):
 
         Xnew, _ = self.cov_func._slice(Xnew)
 
-        phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m, tl=pt)
+        phi_cos, phi_sin = calc_basis_periodic(
+            Xnew - X_center, self.cov_func.period, self._m, tl=pt
+        )
         m = self._m
         J = pt.arange(0, m, 1)
         # rescale basis coefficients by the sqrt variance term

diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py
@@ -129,7 +129,7 @@ def test_mean_invariance(self):
             _ = pm.Data("X", X)
             cov_func = pm.gp.cov.ExpQuad(1, ls=3)
             gp = pm.gp.HSGP(m=[20], L=[10], cov_func=cov_func)
-            _ = gp.prior_linearized(Xs=X)
+            _ = gp.prior_linearized(X=X)
 
         x_new = np.linspace(-10, 20, 100)[:, None]
         with model: