diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 1557ab567bc..1d767b53e8c 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -148,18 +148,37 @@ class Latent(Base): def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): super().__init__(mean_func=mean_func, cov_func=cov_func) - def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): + def _build_prior( + self, name, X, n_outputs=1, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs + ): mu = self.mean_func(X) cov = stabilize(self.cov_func(X), jitter) if reparameterize: - size = np.shape(X)[0] - v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs) - f = pm.Deterministic(name, mu + cholesky(cov).dot(v), dims=kwargs.get("dims", None)) + if "dims" in kwargs: + v = pm.Normal( + name + "_rotated_", + mu=0.0, + sigma=1.0, + **kwargs, + ) + + else: + size = (n_outputs, X.shape[0]) if n_outputs > 1 else X.shape[0] + v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs) + + f = pm.Deterministic( + name, + mu + cholesky(cov).dot(v.T).transpose(), + dims=kwargs.get("dims", None), + ) + else: - f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs) + mu_stack = pt.stack([mu] * n_outputs, axis=0) if n_outputs > 1 else mu + f = pm.MvNormal(name, mu=mu_stack, cov=cov, **kwargs) + return f - def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): + def prior(self, name, X, n_outputs=1, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the GP prior distribution evaluated over the input locations `X`. @@ -178,6 +197,12 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): X : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. + n_outputs : int, default 1 + Number of output GPs. If you're using `dims`, make sure their size + is equal to `(n_outputs, X.shape[0])`, i.e the number of output GPs + by the number of input points. + Example: `gp.prior("f", X=X, n_outputs=3, dims=("n_gps", "x_dim"))`, + where `len(n_gps) = 3` and `len(x_dim = X.shape[0]`. reparameterize : bool, default True Reparameterize the distribution by rotating the random variable by the Cholesky factor of the covariance matrix. @@ -188,10 +213,12 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution constructor. """ + f = self._build_prior(name, X, n_outputs, reparameterize, jitter, **kwargs) - f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f + self.n_outputs = n_outputs + return f def _get_given_vals(self, given): @@ -212,12 +239,16 @@ def _get_given_vals(self, given): def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) + L = cholesky(stabilize(Kxx, jitter)) A = solve_lower(L, Kxs) - v = solve_lower(L, f - mean_total(X)) - mu = self.mean_func(Xnew) + pt.dot(pt.transpose(A), v) + v = solve_lower(L, (f - mean_total(X)).T) + + mu = self.mean_func(Xnew) + pt.dot(pt.transpose(A), v).T + Kss = self.cov_func(Xnew) cov = Kss - pt.dot(pt.transpose(A), A) + return mu, cov def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): @@ -255,7 +286,9 @@ def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, *givens, jitter) - return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) + f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs) + + return f @conditioned_vars(["X", "f", "nu"]) @@ -447,7 +480,15 @@ def _build_marginal_likelihood(self, X, noise_func, jitter): return mu, stabilize(cov, jitter) def marginal_likelihood( - self, name, X, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, is_observed=True, **kwargs + self, + name, + X, + y, + sigma=None, + noise=None, + jitter=JITTER_DEFAULT, + is_observed=True, + **kwargs, ): R""" Returns the marginal likelihood distribution, given the input @@ -529,21 +570,28 @@ def _build_conditional( Kxs = self.cov_func(X, Xnew) Knx = noise_func(X) rxx = y - mean_total(X) + L = cholesky(stabilize(Kxx, jitter) + Knx) A = solve_lower(L, Kxs) - v = solve_lower(L, rxx) - mu = self.mean_func(Xnew) + pt.dot(pt.transpose(A), v) + v = solve_lower(L, rxx.T) + mu = self.mean_func(Xnew) + pt.dot(pt.transpose(A), v).T + if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - pt.sum(pt.square(A), 0) + if pred_noise: var += noise_func(Xnew, diag=True) + return mu, var + else: Kss = self.cov_func(Xnew) cov = Kss - pt.dot(pt.transpose(A), A) + if pred_noise: cov += noise_func(Xnew) + return mu, cov if pred_noise else stabilize(cov, jitter) def conditional( diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 760262276a0..f1adf331ddb 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -442,18 +442,23 @@ def prior( Dimension name for the GP random variable. """ phi, sqrt_psd = self.prior_linearized(X) + self._sqrt_psd = sqrt_psd if self._parametrization == "noncentered": self._beta = pm.Normal( - f"{name}_hsgp_coeffs_", - size=self._m_star - int(self._drop_first), + f"{name}_hsgp_coeffs", + size=self.n_basis_vectors - int(self._drop_first), dims=hsgp_coeffs_dims, ) - self._sqrt_psd = sqrt_psd f = self.mean_func(X) + phi @ (self._beta * self._sqrt_psd) elif self._parametrization == "centered": - self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd, dims=hsgp_coeffs_dims) + self._beta = pm.Normal( + f"{name}_hsgp_coeffs", + sigma=sqrt_psd, + size=self.n_basis_vectors - int(self._drop_first), + dims=hsgp_coeffs_dims, + ) f = self.mean_func(X) + phi @ self._beta self.f = pm.Deterministic(name, f, dims=gp_dims) diff --git a/tests/gp/test_gp.py b/tests/gp/test_gp.py index 9e74cb443a7..9265dd415f0 100644 --- a/tests/gp/test_gp.py +++ b/tests/gp/test_gp.py @@ -17,6 +17,7 @@ import numpy as np import numpy.testing as npt +import pytensor.tensor as pt import pytest import pymc as pm @@ -90,7 +91,12 @@ def test_raise_value_error(self): with self.model: with pytest.raises(ValueError): self.gp.marginal_likelihood( - "like_both", X=self.x, Xu=self.xu, y=self.y, noise=self.sigma, sigma=self.sigma + "like_both", + X=self.x, + Xu=self.xu, + y=self.y, + noise=self.sigma, + sigma=self.sigma, ) with pytest.raises(ValueError): @@ -177,7 +183,11 @@ def setup_method(self): pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]), ) - self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5)) + self.means = ( + pm.gp.mean.Constant(0.5), + pm.gp.mean.Constant(0.5), + pm.gp.mean.Constant(0.5), + ) def testAdditiveMarginal(self): with pm.Model() as model1: @@ -199,7 +209,9 @@ def testAdditiveMarginal(self): with model1: fp1 = gpsum.conditional( - "fp1", self.Xnew, given={"X": self.X, "y": self.y, "sigma": self.noise, "gp": gpsum} + "fp1", + self.Xnew, + given={"X": self.X, "y": self.y, "sigma": self.noise, "gp": gpsum}, ) with model2: fp2 = gptot.conditional("fp2", self.Xnew) @@ -230,7 +242,9 @@ def testAdditiveMarginalApprox(self, approx): with pm.Model() as model2: gptot = pm.gp.MarginalApprox( - mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs), approx=approx + mean_func=reduce(add, self.means), + cov_func=reduce(add, self.covs), + approx=approx, ) fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, sigma=sigma) model2_logp = model2.compile_logp()({}) @@ -352,6 +366,53 @@ def testLatent2(self): latent_logp = model.compile_logp()({"f_rotated_": y_rotated, "p": self.pnew}) npt.assert_allclose(latent_logp, self.logp, atol=5) + def testLatentMultioutput(self): + n_outputs = 2 + X = np.random.randn(20, 3) + y = np.random.randn(n_outputs, 20) + Xnew = np.random.randn(30, 3) + pnew = np.random.randn(n_outputs, 30) + + with pm.Model() as latent_model: + cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) + mean_func = pm.gp.mean.Constant(0.5) + latent_gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func) + latent_f = latent_gp.prior("f", X, n_outputs=n_outputs, reparameterize=True) + latent_p = latent_gp.conditional("p", Xnew) + + with pm.Model() as marginal_model: + cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) + mean_func = pm.gp.mean.Constant(0.5) + marginal_gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) + marginal_f = marginal_gp.marginal_likelihood("f", X, y, sigma=0.0) + marginal_p = marginal_gp.conditional("p", Xnew) + + assert tuple(latent_f.shape.eval()) == tuple(marginal_f.shape.eval()) == y.shape + assert tuple(latent_p.shape.eval()) == tuple(marginal_p.shape.eval()) == pnew.shape + + chol = np.linalg.cholesky(cov_func(X).eval()) + v = np.linalg.solve(chol, (y - 0.5).T) + A = np.linalg.solve(chol, cov_func(X, Xnew).eval()).T + mu_cond = mean_func(Xnew).eval() + (A @ v).T + cov_cond = cov_func(Xnew, Xnew).eval() - A @ A.T + + with pm.Model() as numpy_model: + numpy_p = pm.MvNormal.dist(mu=pt.as_tensor(mu_cond), cov=pt.as_tensor(cov_cond)) + + latent_rv_logp = pm.logp(latent_p, pnew) + marginal_rv_logp = pm.logp(marginal_p, pnew) + numpy_rv_logp = pm.logp(numpy_p, pnew) + + assert ( + latent_rv_logp.shape.eval() + == marginal_rv_logp.shape.eval() + == numpy_rv_logp.shape.eval() + ) + + npt.assert_allclose(latent_rv_logp.eval(), marginal_rv_logp.eval(), atol=5) + npt.assert_allclose(latent_rv_logp.eval(), numpy_rv_logp.eval(), atol=5) + npt.assert_allclose(marginal_rv_logp.eval(), numpy_rv_logp.eval(), atol=5) + class TestTP: R""" @@ -486,7 +547,11 @@ def setup_method(self): self.X = cartesian(*self.Xs) self.N = np.prod([len(X) for X in self.Xs]) self.y = np.random.randn(self.N) * 0.1 - self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1)) + self.Xnews = ( + np.random.randn(5, 1), + np.random.randn(5, 1), + np.random.randn(5, 1), + ) self.Xnew = np.concatenate(self.Xnews, axis=1) self.sigma = 0.2 self.pnew = np.random.randn(len(self.Xnew)) diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py index 5033aa82608..db03c8b8bcf 100644 --- a/tests/gp/test_hsgp_approx.py +++ b/tests/gp/test_hsgp_approx.py @@ -186,7 +186,7 @@ def test_parametrization_drop_first(self, model, cov_func, X1, drop_first): gp = pm.gp.HSGP(m=[n_basis], c=4.0, cov_func=cov_func, drop_first=drop_first) gp.prior("f1", X1) - n_coeffs = model.f1_hsgp_coeffs_.type.shape[0] + n_coeffs = model.f1_hsgp_coeffs.type.shape[0] if drop_first: assert ( n_coeffs == n_basis - 1