Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HSGP misc fixes #7342

Merged
merged 7 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 59 additions & 55 deletions pymc/gp/hsgp_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from collections.abc import Sequence
from types import ModuleType
from typing import NamedTuple

import numpy as np
import pytensor.tensor as pt
Expand All @@ -31,11 +30,14 @@
TensorLike = np.ndarray | pt.TensorVariable


def set_boundary(Xs: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
"""Set the boundary using the `Xs` centered around 0 and `c`. `c` is usually a scalar
multiplier greater than 1.0, but it may be one value per dimension or column of `Xs`.
def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
"""Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be,
and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per
dimension or column of `X`.
"""
S = pt.max(pt.abs(Xs), axis=0) # important: the Xs should be centered around 0
# compute radius. Works whether X is 0-centered or not
S = (pt.max(X, axis=0) - pt.min(X, axis=0)) / 2.0

L = (c * S).eval() # eval() makes sure L is not changed with out-of-sample preds
return L

Expand Down Expand Up @@ -89,44 +91,42 @@ def calc_basis_periodic(
return phi_cos, phi_sin


class HSGPParams(NamedTuple):
m: int
c: float
S: float


def approx_hsgp_hyperparams(
x: np.ndarray, lengthscale_range: list[float], cov_func: str
) -> HSGPParams:
x_range: list[float], lengthscale_range: list[float], cov_func: str
) -> tuple[int, float]:
"""Utility function that uses heuristics to recommend minimum `m` and `c` values,
based on recommendations from Ruitort-Mayol et. al.

In practice, you need to choose `c` large enough to handle the largest lengthscales,
and `m` large enough to accommodate the smallest lengthscales.
and `m` large enough to accommodate the smallest lengthscales. Use your prior on the
lengthscale as guidance for setting the prior range. For example, if you believe
that 95% of the prior mass of the lengthscale is between 1 and 5, set the
`lengthscale_range` to be [1, 5], or maybe a touch wider.

Also, be sure to pass in an `x_range` that is exemplary of the domain not just of your
training data, but also where you intend to make predictions. For instance, if your
training x values are from [0, 10], and you intend to predict from [7, 15], the narrowest
`x_range` you should pass in would be `x_range = [0, 15]`.

NB: These recommendations are based on a one-dimensional GP.

Parameters
----------
x : np.ndarray
The input variable on which the GP is going to be evaluated.
Careful: should be the X values you want to predict over, not *only* the training X.
x_range : list[float]
The range of the x values you intend to both train and predict over. Should be a list with
two elements, [x_min, x_max].
lengthscale_range : List[float]
The range of the lengthscales. Should be a list with two elements [lengthscale_min, lengthscale_max].
The range of the lengthscales. Should be a list with two elements, [lengthscale_min, lengthscale_max].
cov_func : str
The covariance function to use. Supported options are "expquad", "matern52", and "matern32".

Returns
-------
HSGPParams
A named tuple containing the recommended values for `m`, `c`, and `S`.
- `m` : int
Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost.
- `c` : float
Scaling factor such that L = c * S, where L is the boundary of the approximation.
Increasing it helps approximate larger lengthscales, but may require increasing m.
- `S` : float
The value of `S`, which is half the range of `x`.
- `m` : int
Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost.
- `c` : float
Scaling factor such that L = c * S, where L is the boundary of the approximation.
Increasing it helps approximate larger lengthscales, but may require increasing m.

Raises
------
Expand All @@ -139,11 +139,12 @@ def approx_hsgp_hyperparams(
Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
"""
if lengthscale_range[0] >= lengthscale_range[1]:
raise ValueError("One of the boundaries out of order")
raise ValueError("One of the `lengthscale_range` boundaries is out of order.")

X_center = (np.max(x, axis=0) - np.min(x, axis=0)) / 2
Xs = x - X_center
S = np.max(np.abs(Xs), axis=0)
if x_range[0] >= x_range[1]:
raise ValueError("One of the `x_range` boundaries is out of order.")

S = (x_range[1] - x_range[0]) / 2.0

if cov_func.lower() == "expquad":
a1, a2 = 3.2, 1.75
Expand All @@ -162,7 +163,7 @@ def approx_hsgp_hyperparams(
c = max(a1 * (lengthscale_range[1] / S), 1.2)
m = int(a2 * c / (lengthscale_range[0] / S))

return HSGPParams(m=m, c=c, S=S)
return m, c


class HSGP(Base):
Expand Down Expand Up @@ -286,6 +287,7 @@ def __init__(

if parametrization is not None:
parametrization = parametrization.lower().replace("-", "").replace("_", "")

if parametrization not in ["centered", "noncentered"]:
raise ValueError("`parametrization` must be either 'centered' or 'noncentered'.")

Expand Down Expand Up @@ -321,7 +323,7 @@ def L(self) -> pt.TensorVariable:
def L(self, value: TensorLike):
self._L = pt.as_tensor_variable(value)

def prior_linearized(self, Xs: TensorLike):
def prior_linearized(self, X: TensorLike):
"""Linearized version of the HSGP. Returns the Laplace eigenfunctions and the square root
of the power spectral density needed to create the GP.

Expand All @@ -334,7 +336,7 @@ def prior_linearized(self, Xs: TensorLike):

Parameters
----------
Xs: array-like
X: array-like
Function input values.

Returns
Expand Down Expand Up @@ -362,9 +364,9 @@ def prior_linearized(self, Xs: TensorLike):
# L = [10] means the approximation is valid from Xs = [-10, 10]
gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func)

# Set X as Data so it can be mutated later, and then pass it to the GP
X = pm.Data("X", X)
# Pass X to the GP
phi, sqrt_psd = gp.prior_linearized(Xs=X)
phi, sqrt_psd = gp.prior_linearized(X=X)

# Specify standard normal prior in the coefficients, the number of which
# is given by the number of basis vectors, saved in `n_basis_vectors`.
Expand Down Expand Up @@ -394,8 +396,8 @@ def prior_linearized(self, Xs: TensorLike):
# Important: fix the computation of the midpoint of X.
# If X is mutated later, the training midpoint will be subtracted, not the testing one.
if self._X_center is None:
self._X_center = (pt.max(Xs, axis=0) - pt.min(Xs, axis=0)).eval() / 2
Xs = Xs - self._X_center # center for accurate computation
self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2
Xs = X - self._X_center # center for accurate computation
Comment on lines +399 to +400
Copy link
Contributor

@juanitorduz juanitorduz Jun 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small suggestion: Could a unit test be added to check this so that we are sure future changes won't break the signs? We could just wrap this little logic into a small axillary function so that we can easily test it.


# Index Xs using input_dim and active_dims of covariance function
Xs, _ = self.cov_func._slice(Xs)
Expand Down Expand Up @@ -588,10 +590,11 @@ def __init__(

self._m = m
self.scale = scale
self._X_center = None

super().__init__(mean_func=mean_func, cov_func=cov_func)

def prior_linearized(self, Xs: TensorLike):
def prior_linearized(self, X: TensorLike):
"""Linearized version of the approximation. Returns the cosine and sine bases and coefficients
of the expansion needed to create the GP.

Expand All @@ -606,8 +609,8 @@ def prior_linearized(self, Xs: TensorLike):

Parameters
----------
Xs: array-like
Function input values. Assumes they have been mean subtracted or centered at zero.
X: array-like
Function input values.

Returns
-------
Expand All @@ -631,15 +634,9 @@ def prior_linearized(self, Xs: TensorLike):
# m=200 means 200 basis vectors
gp = pm.gp.HSGPPeriodic(m=200, scale=scale, cov_func=cov_func)

# Order is important. First calculate the mean, then make X a shared variable,
# then subtract the mean. When X is mutated later, the correct mean will be
# subtracted.
X_mean = np.mean(X, axis=0)
X = pm.MutableData("X", X)
Xs = X - X_mean

# Pass the zero-subtracted Xs in to the GP
(phi_cos, phi_sin), psd = gp.prior_linearized(Xs=Xs)
# Set X as Data so it can be mutated later, and then pass it to the GP
X = pm.Data("X", X)
(phi_cos, phi_sin), psd = gp.prior_linearized(X=X)

# Specify standard normal prior in the coefficients. The number of which
# is twice the number of basis vectors minus one.
Expand All @@ -666,6 +663,13 @@ def prior_linearized(self, Xs: TensorLike):
with model:
ppc = pm.sample_posterior_predictive(idata, var_names=["f"])
"""
# Important: fix the computation of the midpoint of X.
# If X is mutated later, the training midpoint will be subtracted, not the testing one.
if self._X_center is None:
self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2
Xs = X - self._X_center # center for accurate computation

# Index Xs using input_dim and active_dims of covariance function
Xs, _ = self.cov_func._slice(Xs)

phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m, tl=pt)
Expand All @@ -688,9 +692,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign
dims: None
Dimension name for the GP random variable.
"""
self._X_mean = pt.mean(X, axis=0)

(phi_cos, phi_sin), psd = self.prior_linearized(X - self._X_mean)
(phi_cos, phi_sin), psd = self.prior_linearized(X)

m = self._m
self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1))
Expand All @@ -707,7 +709,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign

def _build_conditional(self, Xnew):
try:
beta, X_mean = self._beta, self._X_mean
beta, X_center = self._beta, self._X_center

except AttributeError:
raise ValueError(
Expand All @@ -716,7 +718,9 @@ def _build_conditional(self, Xnew):

Xnew, _ = self.cov_func._slice(Xnew)

phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m, tl=pt)
phi_cos, phi_sin = calc_basis_periodic(
Xnew - X_center, self.cov_func.period, self._m, tl=pt
)
m = self._m
J = pt.arange(0, m, 1)
# rescale basis coefficients by the sqrt variance term
Expand Down
2 changes: 1 addition & 1 deletion tests/gp/test_hsgp_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_mean_invariance(self):
_ = pm.Data("X", X)
cov_func = pm.gp.cov.ExpQuad(1, ls=3)
gp = pm.gp.HSGP(m=[20], L=[10], cov_func=cov_func)
_ = gp.prior_linearized(Xs=X)
_ = gp.prior_linearized(X=X)

x_new = np.linspace(-10, 20, 100)[:, None]
with model:
Expand Down
Loading