From b2b8ae2cbd2d9b48f7237188aeff2a8d59421826 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 16 Sep 2020 17:10:49 -0400 Subject: [PATCH 01/29] starting to implement kron terms --- python/celerite2/kron.py | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 python/celerite2/kron.py diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py new file mode 100644 index 0000000..b63b5ff --- /dev/null +++ b/python/celerite2/kron.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +import numpy as np + +from .terms import Term + + +class KronTerm(Term): + def __init__(self, term, *, R): + self.term = term + self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) + self.M = len(self.R) + if self.R.ndim != 2 or self.R.shape != (self.M, self.M): + raise ValueError( + "R must be a square matrix; " + "use a LowRankKronTerm for the low rank model" + ) + + def __add__(self, b): + return KronTermSum(self, b) + + def __mul__(self, b): + raise NotImplementedError( + "multiplication is not implemented for KronTerm" + ) + + def get_coefficients(self): + raise ValueError("KronTerm objects don't provide coefficients") + + def get_celerite_matrices( + self, x, diag, *, a=None, U=None, V=None, P=None + ): + # Check the input dimensions + x = np.atleast_1d(x) + diag = np.atleast_1d(diag) + N = len(diag) + if N != len(x) * self.M: + raise ValueError("'diag' must have the shape 'N * M'") + + # Compute the kernel for the celerite subproblem + a_sub, U_sub, V_sub, _ = self.term.get_celerite_matrices( + x, np.zeros_like(x) + ) + J = U_sub.shape[1] + + # Allocate memory as requested + if a is None: + a = np.empty(N) + else: + a.resize(N, refcheck=False) + if U is None: + U = np.empty((N, J)) + else: + U.resize((N, J), refcheck=False) + if V is None: + V = np.empty((N, J)) + else: + V.resize((N, J), refcheck=False) + if P is None: + P = np.empty((N - 1, J)) + else: + P.resize((N - 1, J), refcheck=False) + + # Expand the times appropriately + x_full = np.repeat(x, self.M) + dx = x_full[1:] - x_full[:-1] + a[:] = diag + + a[:] = diag + tt.diag(self.R)[:, None] * (tt.sum(ar) + tt.sum(ac)) + + # a = tt.reshape(a.T, (1, a.size))[0] + # U = tt.slinalg.kron(U, self.R) + # V = tt.slinalg.kron(V, tt.eye(self.R.shape[0])) + # c = tt.concatenate((cr, cc, cc)) + # P = tt.exp(-c[None, :] * dx[:, None]) + # P = tt.tile(P, (1, self.R.shape[0])) + + +class LowRankKronTerm(KronTerm): + pass + + +class KronTermSum(Term): + pass From c252a17ebcb52ecd37846fa4bf58f6301491fc42 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 16 Sep 2020 19:31:59 -0400 Subject: [PATCH 02/29] dealing with shapes --- python/celerite2/celerite2.py | 15 ++++++++----- python/celerite2/kron.py | 40 +++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 44fb0b6..03ed52f 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -38,6 +38,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): self._t = None self._mean_value = None self._diag = None + self._size = None self._log_det = -np.inf self._norm = np.inf @@ -103,7 +104,11 @@ def compute( # Save the diagonal self._t = np.ascontiguousarray(t, dtype=np.float64) self._mean_value = self._mean(self._t) - self._diag = np.empty_like(self._t) + try: + self._diag = np.empty((len(self._t), self.kernel.M)) + except AttributeError: + self._diag = np.empty_like(self._t) + self._size = self._diag.size if yerr is None and diag is None: self._diag[:] = 0.0 @@ -169,9 +174,9 @@ def _process_input(self, y, *, inplace=False, require_vector=False): y = np.atleast_1d(y) if self._t is None: raise RuntimeError("you must call 'compute' first") - if self._t.shape[0] != y.shape[0]: + if self._size != y.shape[0]: raise ValueError("dimension mismatch") - if require_vector and self._t.shape != y.shape: + if require_vector and (self._size,) != y.shape: raise ValueError("'y' must be one dimensional") if inplace: if ( @@ -386,9 +391,9 @@ def sample(self, *, size=None, include_mean=True): if self._t is None: raise RuntimeError("you must call 'compute' first") if size is None: - n = np.random.randn(len(self._t)) + n = np.random.randn(self._size) else: - n = np.random.randn(len(self._t), size) + n = np.random.randn(self._size, size) result = self.dot_tril(n, inplace=True).T if include_mean: result += self._mean(self._t) diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index b63b5ff..a4c2430 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -31,16 +31,19 @@ def get_celerite_matrices( ): # Check the input dimensions x = np.atleast_1d(x) - diag = np.atleast_1d(diag) - N = len(diag) - if N != len(x) * self.M: - raise ValueError("'diag' must have the shape 'N * M'") - - # Compute the kernel for the celerite subproblem - a_sub, U_sub, V_sub, _ = self.term.get_celerite_matrices( + diag = np.atleast_2d(diag) + N0, M = diag.shape + if len(x) != N0 or M != self.M: + raise ValueError("'diag' must have the shape (N, M)") + N = N0 * M + + # Compute the coefficients and kernel for the celerite subproblem + ar, cr, ac, _, cc, _ = self.term.get_coefficients() + _, U_sub, V_sub, _ = self.term.get_celerite_matrices( x, np.zeros_like(x) ) - J = U_sub.shape[1] + J0 = U_sub.shape[1] + J = J0 * self.M # Allocate memory as requested if a is None: @@ -63,16 +66,21 @@ def get_celerite_matrices( # Expand the times appropriately x_full = np.repeat(x, self.M) dx = x_full[1:] - x_full[:-1] - a[:] = diag - a[:] = diag + tt.diag(self.R)[:, None] * (tt.sum(ar) + tt.sum(ac)) + a[:] = ( + diag + np.diag(self.R)[None, :] * (np.sum(ar) + np.sum(ac)) + ).flatten() + U[:] = np.kron(U_sub, self.R) + V[:] = np.kron(V_sub, np.eye(self.M)) + + c = np.concatenate((cr, cc, cc)) + P[:, :J0] = np.exp(-c[None, :] * dx[:, None]) + P[:, J0:] = np.repeat(P[:, :J0], self.M - 1, axis=1) + + return a, U, V, P - # a = tt.reshape(a.T, (1, a.size))[0] - # U = tt.slinalg.kron(U, self.R) - # V = tt.slinalg.kron(V, tt.eye(self.R.shape[0])) - # c = tt.concatenate((cr, cc, cc)) - # P = tt.exp(-c[None, :] * dx[:, None]) - # P = tt.tile(P, (1, self.R.shape[0])) + def get_value(self, tau): + return np.kron(self.term.get_value(tau), self.R) class LowRankKronTerm(KronTerm): From bc93ea77760f4944257c8b72be489bb867ae8499 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 12:04:52 -0400 Subject: [PATCH 03/29] fixing order of columns in kron term --- python/celerite2/kron.py | 84 +++++++++++++++++++++++++++++++++++----- python/test/test_kron.py | 35 +++++++++++++++++ 2 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 python/test/test_kron.py diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index a4c2430..d5c8122 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- + +__all__ = ["KronTerm"] + import numpy as np from .terms import Term @@ -16,7 +19,7 @@ def __init__(self, term, *, R): ) def __add__(self, b): - return KronTermSum(self, b) + raise NotImplementedError("addition is not implemented for KronTerm") def __mul__(self, b): raise NotImplementedError( @@ -24,14 +27,68 @@ def __mul__(self, b): ) def get_coefficients(self): - raise ValueError("KronTerm objects don't provide coefficients") + raise ValueError( + "KronTerm objects don't provide coefficients and they can't be " + "used in operations with other celerite2 terms" + ) + + def get_value(self, tau): + """Compute the value of the kernel as a function of lag + + Args: + tau (shape[...]): The lags where the kernel should be evaluated. + """ + return np.kron(self.term.get_value(tau), self.R) + + def to_dense(self, x, diag): + """Evaluate the dense covariance matrix for this term + + Args: + x (shape[N]): The independent coordinates of the data. + diag (shape[N, M]): The diagonal variance of the system. + """ + K = self.get_value(x[:, None] - x[None, :]) + K[np.diag_indices_from(K)] += np.ascontiguousarray(diag).flatten() + return K + + def get_psd(self, omega): + """Compute the value of the power spectral density for this process + + For Kronecker terms, the PSD computed is actually for the base term. + + Args: + omega (shape[...]): The (angular) frequencies where the power + should be evaluated. + """ + return self.term.get_psd(omega) def get_celerite_matrices( self, x, diag, *, a=None, U=None, V=None, P=None ): + """Get the matrices needed to solve the celerite system + + Pre-allocated arrays can be provided to the Python interface to be + re-used for multiple evaluations. + + .. note:: In-place operations are not supported by the modeling + extensions. + + Args: + x (shape[N]): The independent coordinates of the data. + diag (shape[N, M]): The diagonal variance of the system. + a (shape[N*M], optional): The diagonal of the A matrix. + U (shape[N*M, J*M], optional): The first low-rank matrix. + V (shape[N*M, J*M], optional): The second low-rank matrix. + P (shape[N*M-1, J], optional): The regularization matrix used for + numerical stability. + + Raises: + ValueError: When the inputs are not valid. + """ + # Check the input dimensions - x = np.atleast_1d(x) - diag = np.atleast_2d(diag) + x = np.ascontiguousarray(np.atleast_1d(x)) + diag = np.ascontiguousarray(np.atleast_2d(diag)) N0, M = diag.shape if len(x) != N0 or M != self.M: raise ValueError("'diag' must have the shape (N, M)") @@ -64,7 +121,7 @@ def get_celerite_matrices( P.resize((N - 1, J), refcheck=False) # Expand the times appropriately - x_full = np.repeat(x, self.M) + x_full = np.tile(x[:, None], (1, self.M)).flatten() dx = x_full[1:] - x_full[:-1] a[:] = ( @@ -74,13 +131,22 @@ def get_celerite_matrices( V[:] = np.kron(V_sub, np.eye(self.M)) c = np.concatenate((cr, cc, cc)) - P[:, :J0] = np.exp(-c[None, :] * dx[:, None]) - P[:, J0:] = np.repeat(P[:, :J0], self.M - 1, axis=1) + P0 = np.exp(-c[None, :] * dx[:, None]) + P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape((-1, J)) return a, U, V, P - def get_value(self, tau): - return np.kron(self.term.get_value(tau), self.R) + def get_conditional_mean_matrices(self, x, t): + """Get the matrices needed to compute the conditional mean function + + Args: + x (shape[N]): The independent coordinates of the data. + t (shape[M]): The independent coordinates where the predictions + will be made. + """ + raise NotImplementedError( + "Conditional mean matrices have not (yet!) been implemented" + ) class LowRankKronTerm(KronTerm): diff --git a/python/test/test_kron.py b/python/test/test_kron.py new file mode 100644 index 0000000..8f2edd1 --- /dev/null +++ b/python/test/test_kron.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +import numpy as np + +from celerite2 import kron, terms + + +def test_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M) + eye = np.eye(N * M) + + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.shape == (N * M,) + assert U.shape == (N * M, 2 * M) + assert V.shape == (N * M, 2 * M) + assert P.shape == (N * M - 1, 2 * M) + + tau = x - x[0] + K = term.get_value(tau[:, None] - tau[None, :]) + K[np.diag_indices_from(K)] += diag.flatten() + K0 = term.dot(x, diag, eye) + assert np.allclose(K, K0) + assert np.allclose(np.dot(K, y), term.dot(x, diag, y)) From 1a868877219a0547e0d7812f94c108881d358b39 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 13:47:53 -0400 Subject: [PATCH 04/29] caching theano directory --- .github/workflows/python.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e24d5c3..c1550a9 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -73,6 +73,12 @@ jobs: with: python-version: 3.8 auto-update-conda: true + - name: "Cache ~/.theano" + uses: actions/cache@v2 + with: + path: ~/.theano + key: theano-${{ hashFiles('python/test/theano/*.py') }} + restore-keys: theano- - name: Install dependencies shell: bash -l {0} run: | From c122d8c624f84085feec6fea76b6eec637ddc143 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 13:54:06 -0400 Subject: [PATCH 05/29] cache based on compiledir --- .github/workflows/python.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index c1550a9..e20ba51 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -73,18 +73,24 @@ jobs: with: python-version: 3.8 auto-update-conda: true - - name: "Cache ~/.theano" - uses: actions/cache@v2 - with: - path: ~/.theano - key: theano-${{ hashFiles('python/test/theano/*.py') }} - restore-keys: theano- - name: Install dependencies shell: bash -l {0} run: | conda install -q numpy scipy theano mkl-service python -m pip install -U pip python -m pip install --use-feature=2020-resolver -e ".[test,theano]" + - name: Get theano compiledir + id: compiledir + run: | + python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])" + - name: "Cache ~/.theano" + uses: actions/cache@v2 + with: + path: ~/.theano + key: theano-${{ steps.compiledir.outputs.compiledir }}-${{ hashFiles('python/test/theano/*.py') }} + restore-keys: | + theano-${{ steps.compiledir.outputs.compiledir }}- + theano- - name: Run the unit tests shell: bash -l {0} run: python -m pytest --cov celerite2 python/test/theano From 93888d8e07cd2f21588767166c04108e2f77649e Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 14:00:27 -0400 Subject: [PATCH 06/29] forgotten shell command --- .github/workflows/python.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e20ba51..421a2f6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -81,6 +81,7 @@ jobs: python -m pip install --use-feature=2020-resolver -e ".[test,theano]" - name: Get theano compiledir id: compiledir + shell: bash -l {0} run: | python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])" - name: "Cache ~/.theano" From a7a4791ce69716f3c4b536ba2fba76b97dddae95 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 14:30:29 -0400 Subject: [PATCH 07/29] adding implementation of low rank kron --- python/celerite2/kron.py | 54 ++++++++++++++++++++++++++++++++++------ python/test/test_kron.py | 45 +++++++++++++++++++++++++++------ 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index d5c8122..d37be0d 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -17,6 +17,7 @@ def __init__(self, term, *, R): "R must be a square matrix; " "use a LowRankKronTerm for the low rank model" ) + self.alpha2 = np.diag(self.R) def __add__(self, b): raise NotImplementedError("addition is not implemented for KronTerm") @@ -79,7 +80,7 @@ def get_celerite_matrices( a (shape[N*M], optional): The diagonal of the A matrix. U (shape[N*M, J*M], optional): The first low-rank matrix. V (shape[N*M, J*M], optional): The second low-rank matrix. - P (shape[N*M-1, J], optional): The regularization matrix used for + P (shape[N*M-1, J*M], optional): The regularization matrix used for numerical stability. Raises: @@ -100,7 +101,7 @@ def get_celerite_matrices( x, np.zeros_like(x) ) J0 = U_sub.shape[1] - J = J0 * self.M + J = self._get_J(J0) # Allocate memory as requested if a is None: @@ -125,14 +126,14 @@ def get_celerite_matrices( dx = x_full[1:] - x_full[:-1] a[:] = ( - diag + np.diag(self.R)[None, :] * (np.sum(ar) + np.sum(ac)) + diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)) ).flatten() - U[:] = np.kron(U_sub, self.R) - V[:] = np.kron(V_sub, np.eye(self.M)) + U[:] = np.kron(U_sub, self._get_U_kron()) + V[:] = np.kron(V_sub, self._get_V_kron()) c = np.concatenate((cr, cc, cc)) - P0 = np.exp(-c[None, :] * dx[:, None]) - P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape((-1, J)) + P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0]) + self._copy_P(P0, P) return a, U, V, P @@ -148,9 +149,46 @@ def get_conditional_mean_matrices(self, x, t): "Conditional mean matrices have not (yet!) been implemented" ) + # The following should be implemented by subclasses + def _get_J(self, J0): + return self.M * J0 + + def _get_U_kron(self): + return self.R + + def _get_V_kron(self): + return np.eye(self.M) + + def _copy_P(self, P0, P): + P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape(P.shape) + class LowRankKronTerm(KronTerm): - pass + def __init__(self, term, *, alpha): + self.term = term + self.alpha = np.ascontiguousarray( + np.atleast_1d(alpha), dtype=np.float64 + ) + if self.alpha.ndim != 1: + raise ValueError( + "alpha must be a vector; " + "use a general KronTerm for a full rank model" + ) + self.M = len(self.alpha) + self.R = np.outer(self.alpha, self.alpha) + self.alpha2 = self.alpha ** 2 + + def _get_J(self, J0): + return J0 + + def _get_U_kron(self): + return self.alpha[:, None] + + def _get_V_kron(self): + return self.alpha[:, None] + + def _copy_P(self, P0, P): + pass class KronTermSum(Term): diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 8f2edd1..2c8e248 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -4,6 +4,15 @@ from celerite2 import kron, terms +def check_value(term, x, diag, y): + tau = x - x[0] + K = term.get_value(tau[:, None] - tau[None, :]) + K[np.diag_indices_from(K)] += diag.flatten() + K0 = term.dot(x, diag, np.eye(diag.size)) + assert np.allclose(K, K0) + assert np.allclose(np.dot(K, y), term.dot(x, diag, y)) + + def test_value(): N = 100 M = 5 @@ -11,8 +20,7 @@ def test_value(): np.random.seed(105) x = np.sort(np.random.uniform(0, 10, N)) diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M) - eye = np.eye(N * M) + y = np.random.randn(N * M, 3) R = np.random.randn(M, M) R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) @@ -21,15 +29,36 @@ def test_value(): term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) term = kron.KronTerm(term0, R=R) + a, U, V, P = term.get_celerite_matrices(x, diag) assert a.shape == (N * M,) assert U.shape == (N * M, 2 * M) assert V.shape == (N * M, 2 * M) assert P.shape == (N * M - 1, 2 * M) - tau = x - x[0] - K = term.get_value(tau[:, None] - tau[None, :]) - K[np.diag_indices_from(K)] += diag.flatten() - K0 = term.dot(x, diag, eye) - assert np.allclose(K, K0) - assert np.allclose(np.dot(K, y), term.dot(x, diag, y)) + check_value(term, x, diag, y) + + +def test_low_rank_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + alpha = np.random.randn(M) + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.LowRankKronTerm(term0, alpha=alpha) + + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.shape == (N * M,) + assert U.shape == (N * M, 2) + assert V.shape == (N * M, 2) + assert P.shape == (N * M - 1, 2) + + check_value(term, x, diag, y) + + full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) + assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) From 12e3883f189fe3d4133161a9375428545a6d1e7d Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 16:08:22 -0400 Subject: [PATCH 08/29] implementing kron term sum --- python/celerite2/kron.py | 31 ++++- python/celerite2/terms.py | 261 ++++++++++++++++++++++++++++++++------ python/test/test_kron.py | 27 ++++ python/test/test_terms.py | 64 ++++++++-- 4 files changed, 326 insertions(+), 57 deletions(-) diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index d37be0d..5284bdb 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -1,13 +1,16 @@ # -*- coding: utf-8 -*- -__all__ = ["KronTerm"] +__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"] import numpy as np -from .terms import Term +from .terms import Term, TermSumGeneral class KronTerm(Term): + __compat__ = "kronecker" + __requires_general_addition__ = True + def __init__(self, term, *, R): self.term = term self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) @@ -19,14 +22,29 @@ def __init__(self, term, *, R): ) self.alpha2 = np.diag(self.R) + def __len__(self): + return len(self.term) * self.M + def __add__(self, b): - raise NotImplementedError("addition is not implemented for KronTerm") + if b.__compat__ != self.__compat__: + raise TypeError("KronTerms can only be added to other KronTerms") + return KronTermSum(self, b) + + def __radd__(self, b): + if b.__compat__ != self.__compat__: + raise TypeError("KronTerms can only be added to other KronTerms") + return KronTermSum(b, self) def __mul__(self, b): raise NotImplementedError( "multiplication is not implemented for KronTerm" ) + def __rmul__(self, b): + raise NotImplementedError( + "multiplication is not implemented for KronTerm" + ) + def get_coefficients(self): raise ValueError( "KronTerm objects don't provide coefficients and they can't be " @@ -178,6 +196,9 @@ def __init__(self, term, *, alpha): self.R = np.outer(self.alpha, self.alpha) self.alpha2 = self.alpha ** 2 + def __len__(self): + return len(self.term) + def _get_J(self, J0): return J0 @@ -191,5 +212,5 @@ def _copy_P(self, P0, P): pass -class KronTermSum(Term): - pass +class KronTermSum(TermSumGeneral): + __compat__ = "kronecker" diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index 72f7d04..7d99b92 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -2,6 +2,7 @@ __all__ = [ "Term", + "TermSumGeneral", "TermSum", "TermProduct", "TermDiff", @@ -30,16 +31,61 @@ class Term: """ + __compat__ = None + __requires_general_addition__ = False + def __add__(self, b): + if self.__compat__ != b.__compat__: + raise TypeError("Incompatible terms") + if ( + self.__requires_general_addition__ + or b.__requires_general_addition__ + ): + return TermSumGeneral(self, b) return TermSum(self, b) def __mul__(self, b): + if self.__compat__ != b.__compat__: + raise TypeError("Incompatible terms") return TermProduct(self, b) + def __len__(self): + r, _, c, _, _, _ = self.get_coefficients() + return len(r) + 2 * len(c) + @property def terms(self): return [self] + def to_dense(self, x, diag): + """Evaluate the dense covariance matrix for this term + + Args: + x (shape[N]): The independent coordinates of the data. + diag (shape[N]): The diagonal variance of the system. + """ + K = self.get_value(x[:, None] - x[None, :]) + K[np.diag_indices_from(K)] += diag + return K + + def dot(self, x, diag, y): + """Apply a matrix-vector or matrix-matrix product + + Args: + x (shape[N]): The independent coordinates of the data. + diag (shape[N]): The diagonal variance of the system. + y (shape[N] or shape[N, K]): The target of vector or matrix for + this operation. + """ + a, U, V, P = self.get_celerite_matrices(x, diag) + + y = np.atleast_1d(y) + if y.shape[0] != len(a): + raise ValueError("dimension mismatch") + + z = np.empty(y.shape) + return driver.matmul(a, U, V, P, y, z) + def get_coefficients(self): """Compute and return the coefficients for the celerite model @@ -103,17 +149,6 @@ def get_psd(self, omega): return np.sqrt(2 / np.pi) * psd - def to_dense(self, x, diag): - """Evaluate the dense covariance matrix for this term - - Args: - x (shape[N]): The independent coordinates of the data. - diag (shape[N]): The diagonal variance of the system. - """ - K = self.get_value(x[:, None] - x[None, :]) - K[np.diag_indices_from(K)] += diag - return K - def get_celerite_matrices( self, x, diag, *, a=None, U=None, V=None, P=None ): @@ -195,24 +230,6 @@ def get_conditional_mean_matrices(self, x, t): return U_star, V_star, inds - def dot(self, x, diag, y): - """Apply a matrix-vector or matrix-matrix product - - Args: - x (shape[N]): The independent coordinates of the data. - diag (shape[N]): The diagonal variance of the system. - y (shape[N] or shape[N, K]): The target of vector or matrix for - this operation. - """ - a, U, V, P = self.get_celerite_matrices(x, diag) - - y = np.atleast_1d(y) - if y.shape[0] != len(a): - raise ValueError("dimension mismatch") - - z = np.empty(y.shape) - return driver.matmul(a, U, V, P, y, z) - class TermSum(Term): """A sum of multiple :class:`Term` objects @@ -226,10 +243,10 @@ class TermSum(Term): """ def __init__(self, *terms): - if any(isinstance(term, TermConvolution) for term in terms): + if any(term.__requires_general_addition__ for term in terms): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self._terms = terms @@ -242,6 +259,146 @@ def get_coefficients(self): return tuple(np.concatenate(c) for c in zip(*coeffs)) +class TermSumGeneral(Term): + """A general sum of multiple :class:`Term` objects + + This will be used if any of the terms require "general addition", + or if they don't provide a ``get_coefficients`` method. + + Args: + *terms: Any number of :class:`Term` subclasses to add together. + """ + + __requires_general_addition__ = True + + def __init__(self, *terms): + basic = [ + term for term in terms if not term.__requires_general_addition__ + ] + self._terms = [ + term for term in terms if term.__requires_general_addition__ + ] + if len(basic): + self._terms.insert(0, TermSum(*basic)) + if not len(self._terms): + raise ValueError( + "A general term sum cannot be instantiated without any terms" + ) + + @property + def terms(self): + return self._terms + + def __len__(self): + return sum(map(len, self.terms)) + + def get_value(self, tau): + """Compute the value of the kernel as a function of lag + + Args: + tau (shape[...]): The lags where the kernel should be evaluated. + """ + K = self.terms[0].get_value(tau) + for term in self.terms[1:]: + K += term.get_value(tau) + return K + + def get_psd(self, omega): + """Compute the value of the power spectral density for this process + + Args: + omega (shape[...]): The (angular) frequencies where the power + should be evaluated. + """ + p = self.terms[0].get_psd(omega) + for term in self.terms[1:]: + p += term.get_psd(omega) + return p + + def get_celerite_matrices( + self, x, diag, *, a=None, U=None, V=None, P=None + ): + """Get the matrices needed to solve the celerite system + + Pre-allocated arrays can be provided to the Python interface to be + re-used for multiple evaluations. + + .. note:: In-place operations are not supported by the modeling + extensions. + + Args: + x (shape[N]): The independent coordinates of the data. + diag (shape[N]): The diagonal variance of the system. + a (shape[N], optional): The diagonal of the A matrix. + U (shape[N, J], optional): The first low-rank matrix. + V (shape[N, J], optional): The second low-rank matrix. + P (shape[N-1, J], optional): The regularization matrix used for + numerical stability. + + Raises: + ValueError: When the inputs are not valid. + """ + x = np.atleast_1d(x) + diag = np.atleast_1d(diag) + N = diag.size + sizes = [len(term) for term in self.terms] + J = sum(sizes) + + if a is None: + a = np.empty(N) + else: + a.resize(N, refcheck=False) + if U is None: + U = np.empty((N, J)) + else: + U.resize((N, J), refcheck=False) + if V is None: + V = np.empty((N, J)) + else: + V.resize((N, J), refcheck=False) + if P is None: + P = np.empty((N - 1, J)) + else: + P.resize((N - 1, J), refcheck=False) + + j = sizes[0] + _, U[:, :j], V[:, :j], P[:, :j] = self.terms[0].get_celerite_matrices( + x, + diag, + a=a, + ) + for dj, term in zip(sizes[1:], self.terms[1:]): + ( + da, + U[:, j : j + dj], + V[:, j : j + dj], + P[:, j : j + dj], + ) = term.get_celerite_matrices( + x, + np.zeros_like(diag), + ) + a[:] += da + j += dj + + return a, U, V, P + + def get_conditional_mean_matrices(self, x, t): + """Get the matrices needed to compute the conditional mean function + + Args: + x (shape[N]): The independent coordinates of the data. + t (shape[M]): The independent coordinates where the predictions + will be made. + """ + Us = [] + Vs = [] + for term in self.terms: + U, V, inds = term.get_conditional_mean_matrices(x, t) + Us.append(U) + Vs.append(V) + return np.concatenate(Us, axis=1), np.concatenate(Vs, axis=1), inds + + class TermProduct(Term): """A product of two :class:`Term` objects @@ -255,12 +412,13 @@ class TermProduct(Term): """ def __init__(self, term1, term2): - int1 = isinstance(term1, TermConvolution) - int2 = isinstance(term2, TermConvolution) - if int1 or int2: + if ( + term1.__requires_general_addition__ + or term2.__requires_general_addition__ + ): raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term1 = term1 self.term2 = term2 @@ -316,13 +474,16 @@ class TermDiff(Term): """ def __init__(self, term): - if isinstance(term, TermConvolution): + if term.__requires_general_addition__: raise TypeError( - "You cannot perform operations on an TermConvolution, it must " - "be the outer term in the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term = term + def __len__(self): + return len(self.term) + def get_coefficients(self): coeffs = self.term.get_coefficients() a, b, c, d = coeffs[2:] @@ -350,10 +511,15 @@ class TermConvolution(Term): exposure time). """ + __requires_general_addition__ = True + def __init__(self, term, delta): self.term = term self.delta = float(delta) + def __len__(self): + return len(self.term) + def get_celerite_matrices( self, x, diag, *, a=None, U=None, V=None, P=None ): @@ -523,6 +689,9 @@ def __init__(self, *, a, c): self.a = float(a) self.c = float(c) + def __len__(self): + return 1 + def get_coefficients(self): e = np.empty(0) return np.array([self.a]), np.array([self.c]), e, e, e, e @@ -564,6 +733,9 @@ def __init__(self, *, a, b, c, d): self.c = float(c) self.d = float(d) + def __len__(self): + return 2 + def get_coefficients(self): e = np.empty(0) return ( @@ -658,6 +830,9 @@ def get_test_parameters(): def __init__(self, *, eps=1e-5): self.eps = float(eps) + def __len__(self): + return 2 + def overdamped(self): Q = self.Q f = np.sqrt(np.maximum(1.0 - 4.0 * Q ** 2, self.eps)) @@ -734,6 +909,9 @@ def __init__(self, *, sigma, rho, eps=0.01): self.rho = float(rho) self.eps = float(eps) + def __len__(self): + return 2 + def get_coefficients(self): w0 = np.sqrt(3) / self.rho S0 = self.sigma ** 2 / w0 @@ -814,6 +992,9 @@ def __init__(self, *, sigma, period, Q0, dQ, f): SHOTerm(S0=S1, w0=w1, Q=Q1), SHOTerm(S0=S2, w0=w2, Q=Q2) ) + def __len__(self): + return 4 + class OriginalCeleriteTerm(Term): """A wrapper around terms defined using the original celerite package diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 2c8e248..303eb18 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -62,3 +62,30 @@ def test_low_rank_value(): full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) + + +def test_sum_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + alpha = np.random.randn(M) + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha) + + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.shape == (N * M,) + assert U.shape == (N * M, 2 * M + 2) + assert V.shape == (N * M, 2 * M + 2) + assert P.shape == (N * M - 1, 2 * M + 2) + + check_value(term, x, diag, y) diff --git a/python/test/test_terms.py b/python/test/test_terms.py index 4ec2772..28c87bf 100644 --- a/python/test/test_terms.py +++ b/python/test/test_terms.py @@ -78,18 +78,7 @@ def _convert_kernel(celerite_kernel): raise NotImplementedError() -@pytest.mark.parametrize("oterm", test_terms) -def test_consistency(oterm): - # Check that the coefficients are all correct - term = _convert_kernel(oterm) - for v1, v2 in zip(oterm.get_all_coefficients(), term.get_coefficients()): - assert np.allclose(v1, v2) - for v1, v2 in zip( - terms.OriginalCeleriteTerm(oterm).get_coefficients(), - term.get_coefficients(), - ): - assert np.allclose(v1, v2) - +def check_term(oterm, term): # Make sure that the covariance matrix is right np.random.seed(40582) x = np.sort(np.random.uniform(0, 10, 50)) @@ -117,3 +106,54 @@ def test_consistency(oterm): y = np.vstack([x]).T value = term.dot(x, diag, y) assert np.allclose(value, np.dot(K, y)) + + +@pytest.mark.parametrize("oterm", test_terms) +def test_consistency(oterm): + # Check that the coefficients are all correct + term = _convert_kernel(oterm) + for v1, v2 in zip(oterm.get_all_coefficients(), term.get_coefficients()): + assert np.allclose(v1, v2) + for v1, v2 in zip( + terms.OriginalCeleriteTerm(oterm).get_coefficients(), + term.get_coefficients(), + ): + assert np.allclose(v1, v2) + + check_term(oterm, term) + + +def test_general_sum(): + term1 = terms.SHOTerm(sigma=1.0, rho=0.3, Q=0.1) + term2 = terms.SHOTerm(sigma=1.5, rho=3.3, Q=0.5) + term3 = terms.SHOTerm(sigma=0.3, rho=1.5, Q=2.0) + oterm = term1 + term2 + term3 + + term2.__requires_general_addition__ = True + term3.__requires_general_addition__ = True + term = term1 + term2 + term3 + assert isinstance(oterm, terms.TermSum) + assert isinstance(term, terms.TermSumGeneral) + + np.random.seed(40582) + x = np.sort(np.random.uniform(0, 10, 50)) + t = np.sort(np.random.uniform(-1, 11, 100)) + diag = np.random.uniform(0.1, 0.3, len(x)) + + for n, (a, b) in enumerate( + zip( + oterm.get_celerite_matrices(x, diag), + term.get_celerite_matrices(x, diag), + ) + ): + assert np.allclose(a, b) + + for n, (a, b) in enumerate( + zip( + oterm.get_conditional_mean_matrices(x, t), + term.get_conditional_mean_matrices(x, t), + ) + ): + assert np.allclose(a, b) + + check_term(oterm, term) From 3289b493825be589148c663cb0b857bda3df9819 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 18:24:21 -0400 Subject: [PATCH 09/29] skipping termsumgeneral --- python/test/theano/test_terms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py index 1fbcb8a..51da7d5 100644 --- a/python/test/theano/test_terms.py +++ b/python/test/theano/test_terms.py @@ -27,7 +27,7 @@ def test_complete_implementation(): x = np.linspace(-10, 10, 500) for name in pyterms.__all__: - if name == "OriginalCeleriteTerm": + if name == "OriginalCeleriteTerm" or name == "TermSumGeneral": continue term = getattr(terms, name) if name.startswith("Term"): From 303bdf1c1d613f6b3a9b2b54774ab275dc88a610 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 18:25:29 -0400 Subject: [PATCH 10/29] xfail --- python/test/theano/test_terms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py index 51da7d5..ebe4f50 100644 --- a/python/test/theano/test_terms.py +++ b/python/test/theano/test_terms.py @@ -24,10 +24,11 @@ compare_terms = partial(check_tensor_term, lambda x: x.eval()) +@pytest.mark.xfail(reason="TermSumGeneral not yet implemented") def test_complete_implementation(): x = np.linspace(-10, 10, 500) for name in pyterms.__all__: - if name == "OriginalCeleriteTerm" or name == "TermSumGeneral": + if name == "OriginalCeleriteTerm": continue term = getattr(terms, name) if name.startswith("Term"): From 6be5390f7b13bf6e109878153dcf01110301e7d6 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 14 Oct 2020 18:26:45 -0400 Subject: [PATCH 11/29] cache theano for tutorials too --- .github/workflows/tutorials.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index e93ff60..2f16822 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -29,6 +29,20 @@ jobs: python -m pip install -U pip python -m pip install --use-feature=2020-resolver ".[tutorials]" + - name: Get theano compiledir + id: compiledir + run: | + python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])" + + - name: "Cache ~/.theano" + uses: actions/cache@v2 + with: + path: ~/.theano + key: tutorials-${{ steps.compiledir.outputs.compiledir }}-${{ hashFiles('docs/tutorials/*.py') }} + restore-keys: | + tutorials-${{ steps.compiledir.outputs.compiledir }}- + tutorials- + - name: Execute the notebooks run: | jupytext --to ipynb --execute docs/tutorials/*.py From c746b9f83221e71ffaf9ef1188916d5cc0600d70 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 15 Oct 2020 13:49:30 -0400 Subject: [PATCH 12/29] handling latent dimensions generally --- python/celerite2/celerite2.py | 133 +++++++++++++++++++++++----------- python/celerite2/kron.py | 21 ++++-- python/celerite2/terms.py | 13 ++-- python/test/test_celerite2.py | 3 + 4 files changed, 118 insertions(+), 52 deletions(-) diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 03ed52f..4b86bf7 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -65,7 +65,7 @@ def mean(self, mean): @property def mean_value(self): if self._mean_value is None: - raise AttributeError( + raise RuntimeError( "'compute' must be executed before accessing mean_value" ) return self._mean_value @@ -97,30 +97,45 @@ def compute( # Check the input coordinates t = np.atleast_1d(t) if check_sorted and np.any(np.diff(t) < 0.0): - raise ValueError("the input coordinates must be sorted") + raise ValueError("The input coordinates must be sorted") if len(t.shape) != 1: - raise ValueError("the input coordinates must be one dimensional") + raise ValueError("The input coordinates must be one dimensional") # Save the diagonal self._t = np.ascontiguousarray(t, dtype=np.float64) + if self._t.ndim != 1: + raise ValueError("Input 't' must be 1D") self._mean_value = self._mean(self._t) - try: - self._diag = np.empty((len(self._t), self.kernel.M)) - except AttributeError: - self._diag = np.empty_like(self._t) + + # Check the dimensions + N = len(self._t) + M = self.kernel.dimension + if M: + self._shape = (N, M) + else: + self._shape = (N,) + + self._diag = np.empty(self._shape) self._size = self._diag.size if yerr is None and diag is None: self._diag[:] = 0.0 - elif yerr is not None: - if diag is not None: + else: + if yerr is not None: + if diag is not None: + raise ValueError( + "Only one of 'diag' and 'yerr' can be provided" + ) + diag = np.atleast_1d(yerr) ** 2 + else: + diag = np.atleast_1d(diag) + + if diag.shape != self._shape: raise ValueError( - "only one of 'diag' and 'yerr' can be provided" + f"Invalid shape for 'diag'; expected {self._shape}, got " + f"{diag.shape}" ) - self._diag[:] = np.atleast_1d(yerr) ** 2 - - else: - self._diag[:] = np.atleast_1d(diag) + self._diag[:] = diag # Fill the celerite matrices ( @@ -145,7 +160,7 @@ def compute( else: self._log_det = np.sum(np.log(self._d)) self._norm = -0.5 * ( - self._log_det + len(self._t) * np.log(2 * np.pi) + self._log_det + self._size * np.log(2 * np.pi) ) def recompute(self, *, quiet=False): @@ -164,20 +179,17 @@ def recompute(self, *, quiet=False): """ if self._t is None: raise RuntimeError( - "you must call 'compute' directly at least once" + "You must call 'compute' directly at least once" ) return self.compute( self._t, diag=self._diag, check_sorted=False, quiet=quiet ) def _process_input(self, y, *, inplace=False, require_vector=False): - y = np.atleast_1d(y) if self._t is None: - raise RuntimeError("you must call 'compute' first") - if self._size != y.shape[0]: - raise ValueError("dimension mismatch") - if require_vector and (self._size,) != y.shape: - raise ValueError("'y' must be one dimensional") + raise RuntimeError("You must call 'compute' first") + + y = np.atleast_1d(y) if inplace: if ( y.dtype != "float64" @@ -191,8 +203,26 @@ def _process_input(self, y, *, inplace=False, require_vector=False): y = np.ascontiguousarray(y, dtype=np.float64) else: y = np.array(y, dtype=np.float64, copy=True, order="C") + + if self._shape != y.shape[: len(self._shape)]: + raise ValueError( + f"Dimension mismatch; expected {self._shape}..., " + f"got {y.shape[:len(self._shape)]}..." + ) + + if len(self._shape) == 2: + y = np.reshape(y, (self._size,) + y.shape[2:]) + + if require_vector and (self._size,) != y.shape: + raise ValueError("'y' must be one dimensional") + return y + def _reshape_output(self, y): + if len(self._shape) == 1: + return y + return np.reshape(y, (-1, self._shape[1]) + y.shape[1:]) + def apply_inverse(self, y, *, inplace=False): """Apply the inverse of the covariance matrix to a vector or matrix @@ -213,7 +243,9 @@ def apply_inverse(self, y, *, inplace=False): ValueError: When the inputs are not valid (shape, number, etc.). """ y = self._process_input(y, inplace=inplace) - return driver.solve(self._U, self._P, self._d, self._W, y) + return self._reshape_output( + driver.solve(self._U, self._P, self._d, self._W, y) + ) def dot_tril(self, y, *, inplace=False): """Dot the Cholesky factor of the GP system into a vector or matrix @@ -235,7 +267,9 @@ def dot_tril(self, y, *, inplace=False): ValueError: When the inputs are not valid (shape, number, etc.). """ y = self._process_input(y, inplace=inplace) - return driver.dot_tril(self._U, self._P, self._d, self._W, y) + return self._reshape_output( + driver.dot_tril(self._U, self._P, self._d, self._W, y) + ) def log_likelihood(self, y, *, inplace=False): """Compute the marginalized likelihood of the GP model @@ -257,11 +291,13 @@ def log_likelihood(self, y, *, inplace=False): first. ValueError: When the inputs are not valid (shape, number, etc.). """ - y = self._process_input(y, inplace=inplace, require_vector=True) + y = self._process_input( + y - self.mean_value, inplace=inplace, require_vector=True + ) if not np.isfinite(self._log_det): return -np.inf loglike = self._norm - 0.5 * driver.norm( - self._U, self._P, self._d, self._W, y - self._mean(self._t) + self._U, self._P, self._d, self._W, y ) if not np.isfinite(loglike): return -np.inf @@ -307,11 +343,11 @@ def predict( first. ValueError: When the inputs are not valid (shape, number, etc.). """ - y = self._process_input(y, inplace=True, require_vector=True) - mean_value = self._mean(self._t) - alpha = driver.solve( - self._U, self._P, self._d, self._W, y - mean_value + mean_value = self.mean_value + y = self._process_input( + y - mean_value, inplace=True, require_vector=True ) + alpha = driver.solve(self._U, self._P, self._d, self._W, np.copy(y)) if t is None: xs = self._t @@ -326,9 +362,9 @@ def predict( kernel = self.kernel if t is None: - mu = y - self._diag * alpha - if not include_mean: - mu -= mean_value + mu = self._reshape_output(y - self._diag.flatten() * alpha) + if include_mean: + mu += mean_value else: ( @@ -340,13 +376,13 @@ def predict( mu = driver.conditional_mean( self._U, self._V, self._P, alpha, U_star, V_star, inds, mu ) - + mu = self._reshape_output(mu) if include_mean: mu += self._mean(xs) else: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) - mu = np.dot(KxsT.T, alpha) + mu = self._reshape_output(np.dot(KxsT.T, alpha)) if include_mean: mu += self._mean(xs) @@ -360,14 +396,19 @@ def predict( var = kernel.get_value(0.0) - np.einsum( "ij,ij->j", KxsT, self.apply_inverse(KxsT, inplace=False) ) - return mu, var + return mu, self._reshape_output(var) # Predictive covariance cov = kernel.get_value(xs[:, None] - xs[None, :]) cov -= np.tensordot( KxsT, self.apply_inverse(KxsT, inplace=False), axes=(0, 0) ) - return mu, cov + if len(self._shape) == 1: + return mu, cov + + m = len(xs) + M = self._shape[1] + return mu, np.reshape(cov, (m, M, m, M)) def sample(self, *, size=None, include_mean=True): """Generate random samples from the prior implied by the GP system @@ -391,10 +432,10 @@ def sample(self, *, size=None, include_mean=True): if self._t is None: raise RuntimeError("you must call 'compute' first") if size is None: - n = np.random.randn(self._size) + n = np.random.randn(*self._shape) else: - n = np.random.randn(self._size, size) - result = self.dot_tril(n, inplace=True).T + n = np.random.randn(*(self._shape + (size,))) + result = np.moveaxis(self.dot_tril(n, inplace=True), -1, 0) if include_mean: result += self._mean(self._t) return result @@ -438,6 +479,16 @@ def sample_conditional( mu, cov = self.predict( y, t, return_cov=True, include_mean=include_mean, kernel=kernel ) + + if len(self._shape) == 2: + mu = mu.flatten() + m = cov.shape[0] * cov.shape[1] + cov = np.reshape(cov, (m, m)) + if regularize is not None: cov[np.diag_indices_from(cov)] += regularize - return np.random.multivariate_normal(mu, cov, size=size) + + samp = np.random.multivariate_normal(mu, cov, size=size) + if len(self._shape) == 1: + return samp + return np.reshape(samp, samp.shape[:-1] + (-1, self._shape[1])) diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index 5284bdb..18b35ec 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -8,13 +8,18 @@ class KronTerm(Term): - __compat__ = "kronecker" __requires_general_addition__ = True + @property + def dimension(self): + return self.M + def __init__(self, term, *, R): self.term = term self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) self.M = len(self.R) + if self.M < 1: + raise ValueError("At least one 'band' is required") if self.R.ndim != 2 or self.R.shape != (self.M, self.M): raise ValueError( "R must be a square matrix; " @@ -26,13 +31,13 @@ def __len__(self): return len(self.term) * self.M def __add__(self, b): - if b.__compat__ != self.__compat__: - raise TypeError("KronTerms can only be added to other KronTerms") + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") return KronTermSum(self, b) def __radd__(self, b): - if b.__compat__ != self.__compat__: - raise TypeError("KronTerms can only be added to other KronTerms") + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") return KronTermSum(b, self) def __mul__(self, b): @@ -193,6 +198,8 @@ def __init__(self, term, *, alpha): "use a general KronTerm for a full rank model" ) self.M = len(self.alpha) + if self.M < 1: + raise ValueError("At least one 'band' is required") self.R = np.outer(self.alpha, self.alpha) self.alpha2 = self.alpha ** 2 @@ -213,4 +220,6 @@ def _copy_P(self, P0, P): class KronTermSum(TermSumGeneral): - __compat__ = "kronecker" + @property + def dimension(self): + return self.terms[0].M diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index 7d99b92..eafeadb 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -31,12 +31,15 @@ class Term: """ - __compat__ = None __requires_general_addition__ = False + @property + def dimension(self): + return 0 + def __add__(self, b): - if self.__compat__ != b.__compat__: - raise TypeError("Incompatible terms") + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") if ( self.__requires_general_addition__ or b.__requires_general_addition__ @@ -45,8 +48,8 @@ def __add__(self, b): return TermSum(self, b) def __mul__(self, b): - if self.__compat__ != b.__compat__: - raise TypeError("Incompatible terms") + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") return TermProduct(self, b) def __len__(self): diff --git a/python/test/test_celerite2.py b/python/test/test_celerite2.py index c41f26a..4b130d9 100644 --- a/python/test/test_celerite2.py +++ b/python/test/test_celerite2.py @@ -73,6 +73,9 @@ def test_consistency(oterm, mean, data): dict(return_cov=False, return_var=True), dict(return_cov=True, return_var=False), ]: + print(args) + print(original_gp.predict(y, **args)) + print(gp.predict(y, **args)) assert all( np.allclose(a, b) for a, b in zip( From 256ba4d8246302c35070db17ed0b76148daa77c6 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Fri, 16 Oct 2020 14:53:02 -0400 Subject: [PATCH 13/29] adding tests for predict and sample --- docs/.gitignore | 1 + python/celerite2/celerite2.py | 65 ++++++++++++++++--------- python/test/test_kron.py | 90 ++++++++++++++++++++++++++++++++--- 3 files changed, 126 insertions(+), 30 deletions(-) diff --git a/docs/.gitignore b/docs/.gitignore index 4bbe113..e727465 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1,3 @@ _build c++ +tutorials/*.png diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 4b86bf7..7735614 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -223,6 +223,9 @@ def _reshape_output(self, y): return y return np.reshape(y, (-1, self._shape[1]) + y.shape[1:]) + def _apply_inverse(self, y): + return driver.solve(self._U, self._P, self._d, self._W, y) + def apply_inverse(self, y, *, inplace=False): """Apply the inverse of the covariance matrix to a vector or matrix @@ -243,9 +246,7 @@ def apply_inverse(self, y, *, inplace=False): ValueError: When the inputs are not valid (shape, number, etc.). """ y = self._process_input(y, inplace=inplace) - return self._reshape_output( - driver.solve(self._U, self._P, self._d, self._W, y) - ) + return self._reshape_output(self._apply_inverse(y)) def dot_tril(self, y, *, inplace=False): """Dot the Cholesky factor of the GP system into a vector or matrix @@ -358,6 +359,7 @@ def predict( raise ValueError("dimension mismatch") KxsT = None + mu = None if kernel is None: kernel = self.kernel @@ -365,22 +367,36 @@ def predict( mu = self._reshape_output(y - self._diag.flatten() * alpha) if include_mean: mu += mean_value - else: - - ( - U_star, - V_star, - inds, - ) = self.kernel.get_conditional_mean_matrices(self._t, xs) - mu = np.empty_like(xs) - mu = driver.conditional_mean( - self._U, self._V, self._P, alpha, U_star, V_star, inds, mu - ) - mu = self._reshape_output(mu) - if include_mean: - mu += self._mean(xs) + elif np.all(np.diff(xs > 0)): + + try: + ( + U_star, + V_star, + inds, + ) = self.kernel.get_conditional_mean_matrices(self._t, xs) + + except NotImplementedError: + # We'll fall back on the slow method below + pass + + else: + mu = np.empty_like(xs) + mu = driver.conditional_mean( + self._U, + self._V, + self._P, + alpha, + U_star, + V_star, + inds, + mu, + ) + mu = self._reshape_output(mu) + if include_mean: + mu += self._mean(xs) - else: + if mu is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) mu = self._reshape_output(np.dot(KxsT.T, alpha)) if include_mean: @@ -393,15 +409,15 @@ def predict( if KxsT is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) if return_var: - var = kernel.get_value(0.0) - np.einsum( - "ij,ij->j", KxsT, self.apply_inverse(KxsT, inplace=False) + var = np.diag(kernel.get_value(0.0)) - self._reshape_output( + np.einsum("ij,ij->j", KxsT, self._apply_inverse(np.copy(KxsT))) ) - return mu, self._reshape_output(var) + return mu, var # Predictive covariance cov = kernel.get_value(xs[:, None] - xs[None, :]) cov -= np.tensordot( - KxsT, self.apply_inverse(KxsT, inplace=False), axes=(0, 0) + KxsT, self._apply_inverse(np.copy(KxsT)), axes=(0, 0) ) if len(self._shape) == 1: return mu, cov @@ -435,7 +451,10 @@ def sample(self, *, size=None, include_mean=True): n = np.random.randn(*self._shape) else: n = np.random.randn(*(self._shape + (size,))) - result = np.moveaxis(self.dot_tril(n, inplace=True), -1, 0) + + result = self.dot_tril(n, inplace=True) + if size is not None: + result = np.moveaxis(result, -1, 0) if include_mean: result += self._mean(self._t) return result diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 303eb18..04ea533 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -1,17 +1,90 @@ # -*- coding: utf-8 -*- import numpy as np -from celerite2 import kron, terms +from celerite2 import kron, terms, GaussianProcess -def check_value(term, x, diag, y): - tau = x - x[0] - K = term.get_value(tau[:, None] - tau[None, :]) +def check_value(term, x, diag, y, t): + N, M = diag.shape + + K = term.get_value(x[:, None] - x[None, :]) + + try: + K0 = term.term.get_value(x[:, None] - x[None, :]) + except AttributeError: + pass + else: + assert np.allclose(np.kron(K0, term.R), K) + K[np.diag_indices_from(K)] += diag.flatten() K0 = term.dot(x, diag, np.eye(diag.size)) assert np.allclose(K, K0) assert np.allclose(np.dot(K, y), term.dot(x, diag, y)) + gp = GaussianProcess(term, t=x, diag=diag) + + # "log_likelihood" method + yval = y[:, 0].reshape((N, M)) + alpha = np.linalg.solve(K, y[:, 0]) + loglike = -0.5 * ( + np.dot(y[:, 0], alpha) + + np.linalg.slogdet(K)[1] + + len(K) * np.log(2 * np.pi) + ) + assert np.allclose(loglike, gp.log_likelihood(yval)) + + # Predict + K0 = K - np.diag(diag.flatten()) + mu0 = np.dot(K0, alpha) + cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T)) + + mu, var = gp.predict(yval, return_var=True) + _, cov = gp.predict(yval, return_cov=True) + assert np.allclose(mu, mu0.reshape((N, M))) + assert np.allclose(var, np.diag(cov0).reshape((N, M))) + assert np.allclose(cov, cov0.reshape((N, M, N, M))) + + mu1, var1 = gp.predict(yval, t=x, return_var=True) + _, cov1 = gp.predict(yval, t=x, return_cov=True) + assert np.allclose(mu, mu1) + assert np.allclose(var, var1) + assert np.allclose(cov, cov1) + + K0 = term.get_value(t[:, None] - x[None, :]) + mu0 = np.dot(K0, alpha) + cov0 = term.get_value(t[:, None] - t[None, :]) - np.dot( + K0, np.linalg.solve(K, K0.T) + ) + mu, var = gp.predict(yval, t=t, return_var=True) + _, cov = gp.predict(yval, t=t, return_cov=True) + assert np.allclose(mu, mu0.reshape((len(t), M))) + assert np.allclose(var, np.diag(cov0).reshape((len(t), M))) + assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M))) + + # "sample" method + seed = 5938 + np.random.seed(seed) + a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K))) + np.random.seed(seed) + b = gp.sample() + assert np.allclose(a.reshape((N, M)), b) + + np.random.seed(seed) + a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K), 10)) + np.random.seed(seed) + b = gp.sample(size=10) + assert np.allclose( + np.ascontiguousarray(np.moveaxis(a, -1, 0)).reshape((10, N, M)), b + ) + + # "sample_conditional" method, numerics make this one a little unstable; + # just check the shape + b = gp.sample_conditional(yval, t=t) + assert b.shape == (len(t), M) + + b = gp.sample_conditional(yval, t=t, size=10) + assert b.shape == (10, len(t), M) + def test_value(): N = 100 @@ -19,6 +92,7 @@ def test_value(): np.random.seed(105) x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) diag = np.random.uniform(0.1, 0.5, (N, M)) y = np.random.randn(N * M, 3) @@ -36,7 +110,7 @@ def test_value(): assert V.shape == (N * M, 2 * M) assert P.shape == (N * M - 1, 2 * M) - check_value(term, x, diag, y) + check_value(term, x, diag, y, t) def test_low_rank_value(): @@ -45,6 +119,7 @@ def test_low_rank_value(): np.random.seed(105) x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) diag = np.random.uniform(0.1, 0.5, (N, M)) y = np.random.randn(N * M, 3) @@ -58,7 +133,7 @@ def test_low_rank_value(): assert V.shape == (N * M, 2) assert P.shape == (N * M - 1, 2) - check_value(term, x, diag, y) + check_value(term, x, diag, y, t) full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) @@ -70,6 +145,7 @@ def test_sum_value(): np.random.seed(105) x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) diag = np.random.uniform(0.1, 0.5, (N, M)) y = np.random.randn(N * M, 3) @@ -88,4 +164,4 @@ def test_sum_value(): assert V.shape == (N * M, 2 * M + 2) assert P.shape == (N * M - 1, 2 * M + 2) - check_value(term, x, diag, y) + check_value(term, x, diag, y, t) From 83540264f7a8789b78f80d6fe3af73d550bbf8a2 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Fri, 16 Oct 2020 15:07:20 -0400 Subject: [PATCH 14/29] fixing isort --- python/test/test_kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 04ea533..15e6a1f 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np -from celerite2 import kron, terms, GaussianProcess +from celerite2 import GaussianProcess, kron, terms def check_value(term, x, diag, y, t): From cbca53018e40a925b3a25f65143760312d11a9ab Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Fri, 16 Oct 2020 15:14:06 -0400 Subject: [PATCH 15/29] fixing variance dimensions --- docs/tutorials/first.py | 7 +++++-- python/celerite2/celerite2.py | 4 +++- python/test/theano/test_celerite2.py | 27 --------------------------- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/docs/tutorials/first.py b/docs/tutorials/first.py index 9f23710..82ad05c 100644 --- a/docs/tutorials/first.py +++ b/docs/tutorials/first.py @@ -6,7 +6,7 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.5.2 +# jupytext_version: 1.6.0 # kernelspec: # display_name: Python 3 # language: python @@ -36,7 +36,10 @@ np.random.seed(42) t = np.sort( - np.append(np.random.uniform(0, 3.8, 57), np.random.uniform(5.5, 10, 68),) + np.append( + np.random.uniform(0, 3.8, 57), + np.random.uniform(5.5, 10, 68), + ) ) # The input coordinates must be sorted yerr = np.random.uniform(0.08, 0.22, len(t)) y = ( diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 7735614..357dcba 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -409,7 +409,9 @@ def predict( if KxsT is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) if return_var: - var = np.diag(kernel.get_value(0.0)) - self._reshape_output( + var = np.squeeze( + np.diag(kernel.get_value(0.0)) + ) - self._reshape_output( np.einsum("ij,ij->j", KxsT, self._apply_inverse(np.copy(KxsT))) ) return mu, var diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py index a9b9ddb..7ef1aed 100644 --- a/python/test/theano/test_celerite2.py +++ b/python/test/theano/test_celerite2.py @@ -55,33 +55,6 @@ def test_consistency(name, args, mean): check_gp_models(lambda x: x.eval(), gp, pygp, y, t) - # # "log_likelihood" method - # assert np.allclose(pygp.log_likelihood(y), gp.log_likelihood(y).eval()) - - # # "predict" method - # for args in [ - # dict(return_cov=False, return_var=False), - # dict(return_cov=False, return_var=True), - # dict(return_cov=True, return_var=False), - # ]: - # assert all( - # np.allclose(a, b) - # for a, b in zip( - # pygp.predict(y, **args), - # theano.function([], gp.predict(y, **args))(), - # ) - # ) - # assert all( - # np.allclose(a, b) - # for a, b in zip( - # pygp.predict(y, t=t, **args), - # theano.function([], gp.predict(y, t=t, **args))(), - # ) - # ) - - # # "dot_tril" method - # assert np.allclose(pygp.dot_tril(y), gp.dot_tril(y).eval()) - def test_errors(): # Generate fake data From 1d38e1c8add5eff33dad20402950e8ec8367ed7f Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Fri, 16 Oct 2020 16:04:13 -0400 Subject: [PATCH 16/29] typo --- python/celerite2/celerite2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 357dcba..8675460 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -367,7 +367,7 @@ def predict( mu = self._reshape_output(y - self._diag.flatten() * alpha) if include_mean: mu += mean_value - elif np.all(np.diff(xs > 0)): + elif np.all(np.diff(xs) > 0): try: ( From 5cb6d3ad53c8c6d1ac9aba19254a5ccddbe51641 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 16:02:22 -0400 Subject: [PATCH 17/29] starting to implement theano terms --- python/celerite2/__init__.py | 4 +- python/celerite2/theano/__init__.py | 4 +- python/celerite2/theano/kron.py | 141 ++++++++++++++++++++++++++++ python/celerite2/theano/terms.py | 110 +++++++++++++++++++--- python/test/theano/test_kron.py | 61 ++++++++++++ python/test/theano/test_terms.py | 1 - 6 files changed, 302 insertions(+), 19 deletions(-) create mode 100644 python/celerite2/theano/kron.py create mode 100644 python/test/theano/test_kron.py diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py index d015d8e..d9e6d14 100644 --- a/python/celerite2/__init__.py +++ b/python/celerite2/__init__.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- -__all__ = ["__version__", "terms", "GaussianProcess"] +__all__ = ["__version__", "terms", "kron", "GaussianProcess"] -from . import terms +from . import terms, kron from .celerite2 import GaussianProcess from .celerite2_version import __version__ diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py index 19b104a..905f38c 100644 --- a/python/celerite2/theano/__init__.py +++ b/python/celerite2/theano/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -__all__ = ["terms", "GaussianProcess", "CeleriteNormal"] +__all__ = ["terms", "kron", "GaussianProcess", "CeleriteNormal"] -from . import terms +from . import terms, kron from .celerite2 import GaussianProcess from .distribution import CeleriteNormal diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py new file mode 100644 index 0000000..cc9d54a --- /dev/null +++ b/python/celerite2/theano/kron.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"] + +import theano.tensor as tt +from theano.tensor.slinalg import kron + +from .terms import Term, TermSumGeneral +from .. import kron as base_kron + + +class KronTerm(Term): + __requires_general_addition__ = True + __doc__ = base_kron.KronTerm.__doc__ + + @property + def dimension(self): + return self.M + + def __init__(self, term, *, R): + self.term = term + self.R = tt.as_tensor_variable(R) + self.M = self.R.shape[0] + if self.R.ndim != 2: + raise ValueError( + "R must be a square matrix; " + "use a LowRankKronTerm for the low rank model" + ) + self.alpha2 = tt.diag(self.R) + + def __add__(self, b): + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") + return KronTermSum(self, b) + + def __radd__(self, b): + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") + return KronTermSum(b, self) + + def __mul__(self, b): + raise NotImplementedError( + "multiplication is not implemented for KronTerm" + ) + + def __rmul__(self, b): + raise NotImplementedError( + "multiplication is not implemented for KronTerm" + ) + + def get_coefficients(self): + raise ValueError( + "KronTerm objects don't provide coefficients and they can't be " + "used in operations with other celerite2 terms" + ) + + def get_value(self, tau): + return kron(self.term.get_value(tau), self.R) + + def to_dense(self, x, diag): + K = self.get_value(x[:, None] - x[None, :]) + K += tt.diag(tt.reshape(diag, (-1,))) + return K + + def get_psd(self, omega): + return self.term.get_psd(omega) + + def get_celerite_matrices(self, x, diag): + # Check the input dimensions + x = tt.as_tensor_variable(x) + diag = tt.as_tensor_variable(diag) + if diag.ndim != 2: + raise ValueError("'diag' must have the shape (N, M)") + + # Compute the coefficients and kernel for the celerite subproblem + ar, cr, ac, _, cc, _ = self.term.get_coefficients() + _, U_sub, V_sub, _ = self.term.get_celerite_matrices( + x, tt.zeros_like(x) + ) + + # Expand the times appropriately + x_full = tt.reshape(tt.tile(x[:, None], (1, self.M)), (-1,)) + dx = x_full[1:] - x_full[:-1] + + a = tt.reshape( + diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,) + ) + U = kron(U_sub, self._get_U_kron()) + V = kron(V_sub, self._get_V_kron()) + + c = tt.concatenate((cr, cc, cc)) + P0 = tt.exp(-c[None, :] * dx[:, None]) + P = self._copy_P(P0) + + return a, U, V, P + + def get_conditional_mean_matrices(self, x, t): + raise NotImplementedError( + "Conditional mean matrices have not (yet!) been implemented" + ) + + # The following should be implemented by subclasses + def _get_U_kron(self): + return self.R + + def _get_V_kron(self): + return tt.eye(self.M) + + def _copy_P(self, P0): + return tt.tile(P0[:, :, None], (1, 1, self.M)).reshape( + (P0.shape[0], -1) + ) + + +class LowRankKronTerm(KronTerm): + def __init__(self, term, *, alpha): + self.term = term + self.alpha = tt.as_tensor_variable(alpha) + if self.alpha.ndim != 1: + raise ValueError( + "alpha must be a vector; " + "use a general KronTerm for a full rank model" + ) + self.M = self.alpha.shape[0] + self.R = self.alpha[None, :] * self.alpha[:, None] + self.alpha2 = self.alpha ** 2 + + def _get_U_kron(self): + return self.alpha[:, None] + + def _get_V_kron(self): + return self.alpha[:, None] + + def _copy_P(self, P0): + return P0 + + +class KronTermSum(TermSumGeneral): + @property + def dimension(self): + return self.terms[0].M diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py index 67336be..f49b95f 100644 --- a/python/celerite2/theano/terms.py +++ b/python/celerite2/theano/terms.py @@ -3,6 +3,7 @@ __all__ = [ "Term", "TermSum", + "TermSumGeneral", "TermProduct", "TermDiff", "TermConvolution", @@ -28,11 +29,23 @@ def __init__(self, *, dtype="float64"): self.dtype = dtype self.coefficients = self.get_coefficients() + def __len__(self): + raise TypeError("len is not implemented for Theano terms") + def __add__(self, b): + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") dtype = theano.scalar.upcast(self.dtype, b.dtype) + if ( + self.__requires_general_addition__ + or b.__requires_general_addition__ + ): + return TermSumGeneral(self, b, dtype=dtype) return TermSum(self, b, dtype=dtype) def __mul__(self, b): + if self.dimension != b.dimension: + raise TypeError("Incompatible term dimensions") dtype = theano.scalar.upcast(self.dtype, b.dtype) return TermProduct(self, b, dtype=dtype) @@ -127,11 +140,10 @@ class TermSum(Term): __doc__ = base_terms.TermSum.__doc__ def __init__(self, *terms, **kwargs): - if any(isinstance(term, TermConvolution) for term in terms): + if any(term.__requires_general_addition__ for term in terms): raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self._terms = terms super().__init__(**kwargs) @@ -145,17 +157,87 @@ def get_coefficients(self): return tuple(tt.concatenate(a, axis=0) for a in zip(*coeffs)) +class TermSumGeneral(Term): + __doc__ = base_terms.TermSumGeneral.__doc__ + __requires_general_addition__ = True + + def __init__(self, *terms): + basic = [ + term for term in terms if not term.__requires_general_addition__ + ] + self._terms = [ + term for term in terms if term.__requires_general_addition__ + ] + if len(basic): + self._terms.insert(0, TermSum(*basic)) + if not len(self._terms): + raise ValueError( + "A general term sum cannot be instantiated without any terms" + ) + + @property + def terms(self): + return self._terms + + # def __len__(self): + # return sum(map(len, self.terms)) + + def get_value(self, tau): + K = self.terms[0].get_value(tau) + for term in self.terms[1:]: + K += term.get_value(tau) + return K + + def get_psd(self, omega): + p = self.terms[0].get_psd(omega) + for term in self.terms[1:]: + p += term.get_psd(omega) + return p + + def get_celerite_matrices(self, x, diag): + x = tt.as_tensor_variable(x) + diag = tt.as_tensor_variable(diag) + zeros = tt.zeros_like(diag) + + a = [] + U = [] + V = [] + P = [] + for term in self.terms: + args = term.get_celerite_matrices(x, zeros) + a.append(args[0]) + U.append(args[1]) + V.append(args[2]) + P.append(args[3]) + + a = tt.reshape(diag, -1) + tt.sum(a, axis=0) + U = tt.concatenate(U, axis=1) + V = tt.concatenate(V, axis=1) + P = tt.concatenate(P, axis=1) + + return a, U, V, P + + def get_conditional_mean_matrices(self, x, t): + Us = [] + Vs = [] + for term in self.terms: + U, V, inds = term.get_conditional_mean_matrices(x, t) + Us.append(U) + Vs.append(V) + return tt.concatenate(Us, axis=1), tt.concatenate(Vs, axis=1), inds + + class TermProduct(Term): __doc__ = base_terms.TermProduct.__doc__ def __init__(self, term1, term2, **kwargs): - int1 = isinstance(term1, TermConvolution) - int2 = isinstance(term2, TermConvolution) - if int1 or int2: + if ( + term1.__requires_general_addition__ + or term2.__requires_general_addition__ + ): raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term1 = term1 self.term2 = term2 @@ -230,11 +312,10 @@ class TermDiff(Term): __doc__ = base_terms.TermDiff.__doc__ def __init__(self, term, **kwargs): - if isinstance(term, TermConvolution): + if term.__requires_general_addition__: raise TypeError( - "You cannot perform operations on an " - "TermConvolution, it must be the outer term in " - "the kernel" + "You cannot perform operations on a term that requires general" + " addition, it must be the outer term in the kernel" ) self.term = term super().__init__(**kwargs) @@ -254,6 +335,7 @@ def get_coefficients(self): class TermConvolution(Term): + __requires_general_addition__ = True __doc__ = base_terms.TermConvolution.__doc__ def __init__(self, term, delta, **kwargs): diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py new file mode 100644 index 0000000..29d35a3 --- /dev/null +++ b/python/test/theano/test_kron.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +import numpy as np +import theano +import theano.tensor as tt + +from celerite2.theano import terms, kron + + +def test_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.eval().shape == (N * M,) + assert U.eval().shape == (N * M, 2 * M) + assert V.eval().shape == (N * M, 2 * M) + assert P.eval().shape == (N * M - 1, 2 * M) + + # check_value(term, x, diag, y, t) + + +def test_low_rank_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + alpha = np.random.randn(M) + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.LowRankKronTerm(term0, alpha=alpha) + + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.eval().shape == (N * M,) + assert U.eval().shape == (N * M, 2) + assert V.eval().shape == (N * M, 2) + assert P.eval().shape == (N * M - 1, 2) + + # check_value(term, x, diag, y, t) + + full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) + assert np.allclose( + full_term.dot(x, diag, y).eval(), term.dot(x, diag, y).eval() + ) diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py index ebe4f50..1fbcb8a 100644 --- a/python/test/theano/test_terms.py +++ b/python/test/theano/test_terms.py @@ -24,7 +24,6 @@ compare_terms = partial(check_tensor_term, lambda x: x.eval()) -@pytest.mark.xfail(reason="TermSumGeneral not yet implemented") def test_complete_implementation(): x = np.linspace(-10, 10, 500) for name in pyterms.__all__: From b26bbf2237a223202a87ec9e306fa35e87ef3102 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 18:03:11 -0400 Subject: [PATCH 18/29] updating theano gp for kron --- python/celerite2/celerite2.py | 1 + python/celerite2/ext.py | 87 ++++++++++++++++++-------- python/celerite2/theano/celerite2.py | 14 +++-- python/celerite2/theano/kron.py | 4 -- python/celerite2/theano/terms.py | 2 +- python/test/theano/test_kron.py | 92 +++++++++++++++++++++++++++- 6 files changed, 163 insertions(+), 37 deletions(-) diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index 8675460..b8474de 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -39,6 +39,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): self._mean_value = None self._diag = None self._size = None + self._shape = None self._log_det = -np.inf self._norm = np.inf diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py index 45a2b84..790e340 100644 --- a/python/celerite2/ext.py +++ b/python/celerite2/ext.py @@ -129,6 +129,8 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): self._t = None self._mean_value = None self._diag = None + self._size = None + self._shape = None self._log_det = -np.inf self._norm = np.inf @@ -138,7 +140,13 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): def as_tensor(self, tensor): raise NotImplementedError("must be implemented by subclasses") - def zeros_like(self, tensor): + def zeros(self, shape): + raise NotImplementedError("must be implemented by subclasses") + + def reshape(self, y, shape): + raise NotImplementedError("must be implemented by subclasses") + + def diag_squeeze(self, y): raise NotImplementedError("must be implemented by subclasses") def do_compute(self, quiet): @@ -177,7 +185,12 @@ def compute( # Save the diagonal self._t = self.as_tensor(t) self._mean_value = self._mean(self._t) - self._diag = self.zeros_like(self._t) + if self.kernel.dimension == 0: + self._shape = (self._t.size,) + else: + self._shape = (self._t.size, self.kernel.dimension) + self._diag = self.zeros(self._shape) + self._size = self._diag.size if yerr is not None: if diag is not None: raise ValueError( @@ -202,21 +215,30 @@ def _process_input(self, y, *, require_vector=False): if self._t is None: raise RuntimeError("you must call 'compute' first") y = self.as_tensor(y) + if self.kernel.dimension != 0: + y = self.reshape( + y, (self._size,) + tuple(y.shape[2:]), ndim=y.ndim - 1 + ) if require_vector and y.ndim != 1: raise ValueError("'y' must be one dimensional") return y + def _reshape_output(self, y): + if self.kernel.dimension == 0: + return y + return self.reshape(y, (-1, self._shape[1]) + tuple(y.shape[1:])) + def apply_inverse(self, y): y = self._process_input(y) - return self.do_solve(y) + return self._reshape_output(self.do_solve(y)) def dot_tril(self, y): y = self._process_input(y) - return self.do_dot_tril(y) + return self._reshape_output(self.do_dot_tril(y)) def log_likelihood(self, y): - y = self._process_input(y, require_vector=True) - return self._norm - 0.5 * self.do_norm(y - self._mean_value) + y = self._process_input(y - self.mean_value, require_vector=True) + return self._norm - 0.5 * self.do_norm(y) def predict( self, @@ -229,9 +251,9 @@ def predict( kernel=None, _fast_mean=True, ): - y = self._process_input(y, require_vector=True) - resid = y - self._mean_value - alpha = self.do_solve(resid) + mean_value = self.mean_value + y = self._process_input(y - mean_value, require_vector=True) + alpha = self.do_solve(y) if t is None: xs = self._t @@ -248,26 +270,39 @@ def predict( if t is None: if include_mean: - mu = y - self._diag * alpha + mu = ( + self._reshape_output( + y - self.reshape(self._diag, (-1,)) * alpha + ) + + mean_value + ) else: - mu = resid - self._diag * alpha + mu = self._reshape_output( + y - self.reshape(self._diag, (-1,)) * alpha + ) elif _fast_mean: - ( - U_star, - V_star, - inds, - ) = self.kernel.get_conditional_mean_matrices(self._t, xs) - mu = self.do_conditional_mean(alpha, U_star, V_star, inds) + try: + ( + U_star, + V_star, + inds, + ) = self.kernel.get_conditional_mean_matrices(self._t, xs) + except NotImplementedError: + pass + else: + mu = self._reshape_output( + self.do_conditional_mean(alpha, U_star, V_star, inds) + ) - if include_mean: - mu += self._mean(xs) + if include_mean: + mu += self._mean(xs) if mu is None: if kernel is None: kernel = self.kernel KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) - mu = self.tensordot(KxsT, alpha) + mu = self._reshape_output(self.tensordot(KxsT, alpha)) if include_mean: mu += self._mean(xs) @@ -278,15 +313,19 @@ def predict( if KxsT is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) if return_var: - var = kernel.get_value(0.0) - self.diagdot( - KxsT, self.do_solve(KxsT) - ) + var = self.diag_squeeze( + kernel.get_value(self.zeros((1, 1))) + ) - self._reshape_output(self.diagdot(KxsT, self.do_solve(KxsT))) return mu, var # Predictive covariance cov = kernel.get_value(xs[:, None] - xs[None, :]) cov -= self.tensordot(KxsT, self.do_solve(KxsT)) - return mu, cov + + if self.kernel.dimension == 0: + return mu, cov + + return mu, self.reshape(cov, tuple(mu.shape) + tuple(mu.shape)) def sample(self, *args, **kwargs): raise NotImplementedError("'sample' is not implemented by extensions") diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py index 5fd8a5f..b3a6bc5 100644 --- a/python/celerite2/theano/celerite2.py +++ b/python/celerite2/theano/celerite2.py @@ -51,8 +51,14 @@ class GaussianProcess(BaseGaussianProcess): def as_tensor(self, tensor): return tt.as_tensor_variable(tensor).astype("float64") - def zeros_like(self, tensor): - return tt.zeros_like(tensor) + def zeros(self, shape): + return tt.zeros(shape) + + def reshape(self, y, shape, ndim=None): + return tt.reshape(y, shape, ndim=ndim) + + def diag_squeeze(self, y): + return tt.squeeze(tt.diag(y)) def do_compute(self, quiet): if quiet: @@ -69,9 +75,7 @@ def do_compute(self, quiet): ) self._log_det = tt.sum(tt.log(self._d)) - self._norm = -0.5 * ( - self._log_det + self._t.shape[0] * np.log(2 * np.pi) - ) + self._norm = -0.5 * (self._log_det + self._size * np.log(2 * np.pi)) def check_sorted(self, t): return tt.opt.Assert()(t, tt.all(t[1:] - t[:-1] >= 0)) diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index cc9d54a..c5c6d9c 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -29,13 +29,9 @@ def __init__(self, term, *, R): self.alpha2 = tt.diag(self.R) def __add__(self, b): - if self.dimension != b.dimension: - raise TypeError("Incompatible term dimensions") return KronTermSum(self, b) def __radd__(self, b): - if self.dimension != b.dimension: - raise TypeError("Incompatible term dimensions") return KronTermSum(b, self) def __mul__(self, b): diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py index f49b95f..4833b1e 100644 --- a/python/celerite2/theano/terms.py +++ b/python/celerite2/theano/terms.py @@ -210,7 +210,7 @@ def get_celerite_matrices(self, x, diag): V.append(args[2]) P.append(args[3]) - a = tt.reshape(diag, -1) + tt.sum(a, axis=0) + a = tt.reshape(diag, (-1,)) + tt.sum(a, axis=0) U = tt.concatenate(U, axis=1) V = tt.concatenate(V, axis=1) P = tt.concatenate(P, axis=1) diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index 29d35a3..e851973 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -3,7 +3,65 @@ import theano import theano.tensor as tt -from celerite2.theano import terms, kron +from celerite2.theano import terms, kron, GaussianProcess + + +def check_value(term, x, diag, y, t): + N, M = diag.shape + + K = term.get_value(x[:, None] - x[None, :]).eval() + + try: + K0 = term.term.get_value(x[:, None] - x[None, :]).eval() + except AttributeError: + pass + else: + assert np.allclose(np.kron(K0, term.R.eval()), K) + + K += np.diag(diag.flatten()) + K0 = term.dot(x, diag, np.eye(diag.size)).eval() + assert np.allclose(K, K0) + assert np.allclose(np.dot(K, y), term.dot(x, diag, y).eval()) + + gp = GaussianProcess(term, t=x, diag=diag) + + # "log_likelihood" method + yval = y[:, 0].reshape((N, M)) + alpha = np.linalg.solve(K, y[:, 0]) + loglike = -0.5 * ( + np.dot(y[:, 0], alpha) + + np.linalg.slogdet(K)[1] + + len(K) * np.log(2 * np.pi) + ) + assert np.allclose(loglike, gp.log_likelihood(yval).eval()) + + # Predict + K0 = K - np.diag(diag.flatten()) + mu0 = np.dot(K0, alpha) + cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T)) + + mu, var = theano.function([], gp.predict(yval, return_var=True))() + _, cov = theano.function([], gp.predict(yval, return_cov=True))() + assert np.allclose(mu, mu0.reshape((N, M))) + assert np.allclose(var, np.diag(cov0).reshape((N, M))) + assert np.allclose(cov, cov0.reshape((N, M, N, M))) + + mu1, var1 = theano.function([], gp.predict(yval, t=x, return_var=True))() + _, cov1 = theano.function([], gp.predict(yval, t=x, return_cov=True))() + assert np.allclose(mu, mu1) + assert np.allclose(var, var1) + assert np.allclose(cov, cov1) + + K0 = term.get_value(t[:, None] - x[None, :]).eval() + mu0 = np.dot(K0, alpha) + cov0 = term.get_value(t[:, None] - t[None, :]).eval() - np.dot( + K0, np.linalg.solve(K, K0.T) + ) + mu, var = theano.function([], gp.predict(yval, t=t, return_var=True))() + _, cov = theano.function([], gp.predict(yval, t=t, return_cov=True))() + assert np.allclose(mu, mu0.reshape((len(t), M))) + assert np.allclose(var, np.diag(cov0).reshape((len(t), M))) + assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M))) def test_value(): @@ -30,7 +88,7 @@ def test_value(): assert V.eval().shape == (N * M, 2 * M) assert P.eval().shape == (N * M - 1, 2 * M) - # check_value(term, x, diag, y, t) + check_value(term, x, diag, y, t) def test_low_rank_value(): @@ -53,9 +111,37 @@ def test_low_rank_value(): assert V.eval().shape == (N * M, 2) assert P.eval().shape == (N * M - 1, 2) - # check_value(term, x, diag, y, t) + check_value(term, x, diag, y, t) full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha)) assert np.allclose( full_term.dot(x, diag, y).eval(), term.dot(x, diag, y).eval() ) + + +def test_sum_value(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + alpha = np.random.randn(M) + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha) + + a, U, V, P = term.get_celerite_matrices(x, diag) + assert a.eval().shape == (N * M,) + assert U.eval().shape == (N * M, 2 * M + 2) + assert V.eval().shape == (N * M, 2 * M + 2) + assert P.eval().shape == (N * M - 1, 2 * M + 2) + + check_value(term, x, diag, y, t) From bb88ce28f26e9afaa1d9278c45822ff129522dab Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 18:05:33 -0400 Subject: [PATCH 19/29] updating jax an torch --- python/celerite2/jax/celerite2.py | 12 +++++++++--- python/celerite2/torch/celerite2.py | 10 ++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/python/celerite2/jax/celerite2.py b/python/celerite2/jax/celerite2.py index 1dbbe54..d5aff69 100644 --- a/python/celerite2/jax/celerite2.py +++ b/python/celerite2/jax/celerite2.py @@ -12,8 +12,14 @@ class GaussianProcess(BaseGaussianProcess): def as_tensor(self, tensor): return np.asarray(tensor, dtype=np.float64) - def zeros_like(self, tensor): - return np.zeros_like(tensor) + def zeros(self, shape): + return np.zeros(shape) + + def reshape(self, y, shape, ndim=None): + return np.reshape(y, shape) + + def diag_squeeze(self, y): + return np.squeeze(np.diag(y)) def do_compute(self, quiet): # Compute the Cholesky factorization @@ -29,7 +35,7 @@ def do_compute(self, quiet): else: self._log_det = np.sum(np.log(self._d)) self._norm = -0.5 * ( - self._log_det + len(self._t) * np.log(2 * np.pi) + self._log_det + self._size * np.log(2 * np.pi) ) def check_sorted(self, t): diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py index d7b7e85..fb5f2da 100644 --- a/python/celerite2/torch/celerite2.py +++ b/python/celerite2/torch/celerite2.py @@ -29,8 +29,14 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): def as_tensor(self, tensor): return as_tensor(tensor) - def zeros_like(self, tensor): - return torch.zeros_like(tensor) + def zeros(self, shape): + return torch.zeros(shape) + + def reshape(self, y, shape, ndim=None): + return torch.reshape(y, shape) + + def diag_squeeze(self, y): + return torch.squeeze(torch.diag(y)) def do_compute(self, quiet): # Compute the Cholesky factorization From 1430f46ce3c402a8321a49ed1c0a3e49d4e9b0b8 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 18:06:09 -0400 Subject: [PATCH 20/29] sorting imports --- python/celerite2/__init__.py | 2 +- python/celerite2/theano/__init__.py | 2 +- python/celerite2/theano/kron.py | 5 ++--- python/test/theano/test_kron.py | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py index d9e6d14..e5db30d 100644 --- a/python/celerite2/__init__.py +++ b/python/celerite2/__init__.py @@ -2,7 +2,7 @@ __all__ = ["__version__", "terms", "kron", "GaussianProcess"] -from . import terms, kron +from . import kron, terms from .celerite2 import GaussianProcess from .celerite2_version import __version__ diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py index 905f38c..38b1142 100644 --- a/python/celerite2/theano/__init__.py +++ b/python/celerite2/theano/__init__.py @@ -2,6 +2,6 @@ __all__ = ["terms", "kron", "GaussianProcess", "CeleriteNormal"] -from . import terms, kron +from . import kron, terms from .celerite2 import GaussianProcess from .distribution import CeleriteNormal diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index c5c6d9c..4f3b622 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- __all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"] - -import theano.tensor as tt +from theano import tensor as tt from theano.tensor.slinalg import kron -from .terms import Term, TermSumGeneral from .. import kron as base_kron +from .terms import Term, TermSumGeneral class KronTerm(Term): diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index e851973..1c8d3e0 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- import numpy as np import theano -import theano.tensor as tt +from theano import tensor as tt -from celerite2.theano import terms, kron, GaussianProcess +from celerite2.theano import GaussianProcess, kron, terms def check_value(term, x, diag, y, t): From 9a776204d62eb47c6896d544da1866e6ac9cb3c6 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 19:25:44 -0400 Subject: [PATCH 21/29] typos --- python/celerite2/ext.py | 10 +++++++--- python/celerite2/jax/terms.py | 4 ++++ python/celerite2/torch/terms.py | 4 ++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py index 790e340..0158674 100644 --- a/python/celerite2/ext.py +++ b/python/celerite2/ext.py @@ -313,9 +313,13 @@ def predict( if KxsT is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) if return_var: - var = self.diag_squeeze( - kernel.get_value(self.zeros((1, 1))) - ) - self._reshape_output(self.diagdot(KxsT, self.do_solve(KxsT))) + if self.kernel.dimension == 0: + var0 = kernel.get_value(0.0) + else: + var0 = self.diag_squeeze(kernel.get_value(self.zeros((1, 1)))) + var = var0 - self._reshape_output( + self.diagdot(KxsT, self.do_solve(KxsT)) + ) return mu, var # Predictive covariance diff --git a/python/celerite2/jax/terms.py b/python/celerite2/jax/terms.py index 10d5884..ab887d7 100644 --- a/python/celerite2/jax/terms.py +++ b/python/celerite2/jax/terms.py @@ -27,6 +27,10 @@ def __add__(self, b): def __mul__(self, b): return TermProduct(self, b) + @property + def dimension(self): + return 0 + @property def terms(self): return [self] diff --git a/python/celerite2/torch/terms.py b/python/celerite2/torch/terms.py index 8de426d..a523e73 100644 --- a/python/celerite2/torch/terms.py +++ b/python/celerite2/torch/terms.py @@ -33,6 +33,10 @@ def __add__(self, b): def __mul__(self, b): return TermProduct(self, b) + @property + def dimension(self): + return 0 + @property def terms(self): return [self] From 4d6d3ca1b25515cd6f7d57f2f7738a14d4b6b928 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 19:41:10 -0400 Subject: [PATCH 22/29] torch sizes --- python/celerite2/ext.py | 9 ++++++--- python/celerite2/torch/celerite2.py | 14 ++++++++++++++ python/test/theano/test_kron.py | 1 - 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py index 0158674..93a06b8 100644 --- a/python/celerite2/ext.py +++ b/python/celerite2/ext.py @@ -140,6 +140,9 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): def as_tensor(self, tensor): raise NotImplementedError("must be implemented by subclasses") + def get_size(self, tensor): + return tensor.size + def zeros(self, shape): raise NotImplementedError("must be implemented by subclasses") @@ -186,11 +189,11 @@ def compute( self._t = self.as_tensor(t) self._mean_value = self._mean(self._t) if self.kernel.dimension == 0: - self._shape = (self._t.size,) + self._shape = (self.get_size(self._t),) else: - self._shape = (self._t.size, self.kernel.dimension) + self._shape = (self.get_size(self._t), self.kernel.dimension) self._diag = self.zeros(self._shape) - self._size = self._diag.size + self._size = self.get_size(self._diag) if yerr is not None: if diag is not None: raise ValueError( diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py index fb5f2da..8b7e805 100644 --- a/python/celerite2/torch/celerite2.py +++ b/python/celerite2/torch/celerite2.py @@ -19,16 +19,30 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): # Placeholders for storing data self._t = None + self._mean_value = None self._diag = None + self._size = None + self._shape = None self._log_det = -np.inf self._norm = np.inf if t is not None: self.compute(t, **kwargs) + @property + def mean_value(self): + if self._mean_value is None: + raise RuntimeError( + "'compute' must be executed before accessing mean_value" + ) + return self._mean_value + def as_tensor(self, tensor): return as_tensor(tensor) + def get_size(self, tensor): + return torch.prod(tensor.size()) + def zeros(self, shape): return torch.zeros(shape) diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index 1c8d3e0..d8e6f5d 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import numpy as np import theano -from theano import tensor as tt from celerite2.theano import GaussianProcess, kron, terms From dd91eb9724e5e660d818a7ca6628349802614ade Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Oct 2020 20:08:55 -0400 Subject: [PATCH 23/29] torch size again --- python/celerite2/torch/celerite2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py index 8b7e805..19c7273 100644 --- a/python/celerite2/torch/celerite2.py +++ b/python/celerite2/torch/celerite2.py @@ -41,7 +41,7 @@ def as_tensor(self, tensor): return as_tensor(tensor) def get_size(self, tensor): - return torch.prod(tensor.size()) + return torch.numel(tensor) def zeros(self, shape): return torch.zeros(shape) From badd7292143eb03596f1c1e220408c687f28d802 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 21 Oct 2020 09:09:31 -0400 Subject: [PATCH 24/29] adding tests for citations --- python/celerite2/theano/celerite2.py | 2 + python/celerite2/theano/kron.py | 26 ++++++++++++ python/celerite2/theano/terms.py | 1 + python/test/theano/test_celerite2.py | 63 +++++++++++++++++----------- python/test/theano/test_kron.py | 59 ++++++++++++++------------ setup.py | 2 +- 6 files changed, 100 insertions(+), 53 deletions(-) diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py index b3a6bc5..aad68a0 100644 --- a/python/celerite2/theano/celerite2.py +++ b/python/celerite2/theano/celerite2.py @@ -106,6 +106,8 @@ def _add_citations_to_pymc3_model(self, **kwargs): if not hasattr(model, "__citations__"): model.__citations__ = dict() model.__citations__["celerite2"] = CITATIONS + if self.kernel.__citations__: + model.__citations__["celerite2:kernel"] = self.kernel.__citations__ def marginal(self, name, **kwargs): """Add the marginal likelihood to a PyMC3 model diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index 4f3b622..a5c6d01 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -7,10 +7,34 @@ from .. import kron as base_kron from .terms import Term, TermSumGeneral +CITATIONS = ( + ("celerite2:gordon20",), + r""" +@article{celerite2:gordon20, + author = {{Gordon}, Tyler and {Agol}, Eric and + {Foreman-Mackey}, Daniel}, + title = "{A Fast, 2D Gaussian Process Method Based on Celerite: + Applications to Transiting Exoplanet Discovery and + Characterization}", + journal = {arXiv e-prints}, + year = 2020, + month = jul, + eid = {arXiv:2007.05799}, + pages = {arXiv:2007.05799}, +archivePrefix = {arXiv}, + eprint = {2007.05799}, + primaryClass = {astro-ph.IM}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200705799G}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} +""", +) + class KronTerm(Term): __requires_general_addition__ = True __doc__ = base_kron.KronTerm.__doc__ + __citations__ = CITATIONS @property def dimension(self): @@ -131,6 +155,8 @@ def _copy_P(self, P0): class KronTermSum(TermSumGeneral): + __citations__ = CITATIONS + @property def dimension(self): return self.terms[0].M diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py index 4833b1e..90d7a8e 100644 --- a/python/celerite2/theano/terms.py +++ b/python/celerite2/theano/terms.py @@ -24,6 +24,7 @@ class Term(base_terms.Term): __doc__ = base_terms.Term.__doc__ + __citations__ = None def __init__(self, *, dtype="float64"): self.dtype = dtype diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py index 7ef1aed..b9caa65 100644 --- a/python/test/theano/test_celerite2.py +++ b/python/test/theano/test_celerite2.py @@ -1,25 +1,13 @@ # -*- coding: utf-8 -*- import numpy as np +import pymc3 as pm import pytest import celerite2 from celerite2 import terms as pyterms from celerite2.testing import check_gp_models - -try: - import theano # noqa -except ImportError: - HAS_THEANO = False -else: - from celerite2.theano import GaussianProcess, terms - - HAS_THEANO = True - - -pytestmark = pytest.mark.skipif( - not HAS_THEANO, reason="Theano is not installed" -) - +from celerite2.theano import GaussianProcess, terms +from celerite2.theano.celerite2 import CITATIONS term_mark = pytest.mark.parametrize( "name,args", @@ -35,15 +23,21 @@ ) -@term_mark -@pytest.mark.parametrize("mean", [0.0, 10.5]) -def test_consistency(name, args, mean): +@pytest.fixture +def data(): # Generate fake data np.random.seed(40582) x = np.sort(np.random.uniform(0, 10, 50)) t = np.sort(np.random.uniform(-1, 12, 100)) diag = np.random.uniform(0.1, 0.3, len(x)) y = np.sin(x) + return x, diag, y, t + + +@term_mark +@pytest.mark.parametrize("mean", [0.0, 10.5]) +def test_consistency(name, args, mean, data): + x, diag, y, t = data term = getattr(terms, name)(**args) gp = GaussianProcess(term, mean=mean) @@ -56,13 +50,8 @@ def test_consistency(name, args, mean): check_gp_models(lambda x: x.eval(), gp, pygp, y, t) -def test_errors(): - # Generate fake data - np.random.seed(40582) - x = np.sort(np.random.uniform(0, 10, 50)) - t = np.sort(np.random.uniform(-1, 12, 100)) - diag = np.random.uniform(0.1, 0.3, len(x)) - y = np.sin(x) +def test_errors(data): + x, diag, y, t = data term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0) gp = GaussianProcess(term) @@ -108,3 +97,27 @@ def test_errors(): with pytest.raises(ValueError): gp.predict(y, t=np.tile(t[:, None], (1, 5))) + + +def test_marginal(data): + x, diag, y, t = data + + with pm.Model() as model: + term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0) + gp = GaussianProcess(term, t=x, diag=diag) + gp.marginal("obs", observed=y) + + assert np.allclose( + model.fastfn(model.logpt)(model.test_point), + model.fastfn(gp.log_likelihood(y))(model.test_point), + ) + + +def test_citations(data): + x, diag, y, t = data + + with pm.Model() as model: + term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0) + gp = GaussianProcess(term, t=x, diag=diag) + gp.marginal("obs", observed=y) + assert model.__citations__["celerite2"] == CITATIONS diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index d8e6f5d..9a98d3e 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -1,10 +1,26 @@ # -*- coding: utf-8 -*- import numpy as np +import pymc3 as pm +import pytest import theano from celerite2.theano import GaussianProcess, kron, terms +@pytest.fixture +def data(): + N = 100 + M = 5 + + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + + return N, M, x, diag, y, t + + def check_value(term, x, diag, y, t): N, M = diag.shape @@ -63,15 +79,8 @@ def check_value(term, x, diag, y, t): assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M))) -def test_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_value(data): + N, M, x, diag, y, t = data R = np.random.randn(M, M) R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) @@ -90,15 +99,8 @@ def test_value(): check_value(term, x, diag, y, t) -def test_low_rank_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_low_rank_value(data): + N, M, x, diag, y, t = data alpha = np.random.randn(M) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) @@ -118,15 +120,8 @@ def test_low_rank_value(): ) -def test_sum_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_sum_value(data): + N, M, x, diag, y, t = data alpha = np.random.randn(M) R = np.random.randn(M, M) @@ -144,3 +139,13 @@ def test_sum_value(): assert P.eval().shape == (N * M - 1, 2 * M + 2) check_value(term, x, diag, y, t) + + +def test_citations(data): + N, M, x, diag, y, t = data + + with pm.Model() as model: + term = kron.KronTerm(terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0), R=np.eye(M)) + gp = GaussianProcess(term, t=x, diag=diag) + gp.marginal("obs", observed=y[:, 0].reshape((N, M))) + assert model.__citations__["celerite2:kernel"] == kron.CITATIONS diff --git a/setup.py b/setup.py index 658b8fe..9fbf1c0 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ "scipy", "celerite>=0.3.1", ], - "theano": ["theano"], + "theano": ["theano", "pymc3"], "torch": ["torch"], "jax": ["jax", "jaxlib"], "release": ["pep517", "twine"], From 7bd2ce3e0eaaefd2ea566ef32ee5d482ff895a2c Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 21 Oct 2020 10:53:37 -0400 Subject: [PATCH 25/29] adding kron tutorial --- docs/index.rst | 1 + docs/tutorials/first.py | 2 +- docs/tutorials/kron.py | 130 ++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 docs/tutorials/kron.py diff --git a/docs/index.rst b/docs/index.rst index a27a727..461a8da 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -36,6 +36,7 @@ be a good choice. :caption: Tutorials tutorials/first.ipynb + tutorials/kron.ipynb .. toctree:: :maxdepth: 2 diff --git a/docs/tutorials/first.py b/docs/tutorials/first.py index 82ad05c..d186f54 100644 --- a/docs/tutorials/first.py +++ b/docs/tutorials/first.py @@ -6,7 +6,7 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.6.0 +# jupytext_version: 1.5.2 # kernelspec: # display_name: Python 3 # language: python diff --git a/docs/tutorials/kron.py b/docs/tutorials/kron.py new file mode 100644 index 0000000..02e909c --- /dev/null +++ b/docs/tutorials/kron.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.5.2 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# + nbsphinx="hidden" +# %matplotlib inline + +# + nbsphinx="hidden" +# %run notebook_setup +# - + +# # Multivariate models +# + +# + +import numpy as np +import matplotlib.pyplot as plt + +import celerite2 + +N = 200 +M = 5 +lam = np.linspace(0, 3, M) + +np.random.seed(59302) +t = np.append( + np.sort(np.random.uniform(0, 4, N // 2)), + np.sort(np.random.uniform(6, 10, N - N // 2)), +) +yerr = np.random.uniform(1e-1, 2e-1, (N, M)) + +rho_true = 4.5 +R_true = 0.5 * np.exp(-0.5 * (lam[:, None] - lam[None, :]) ** 2) +kernel = celerite2.kron.KronTerm( + celerite2.terms.SHOTerm(sigma=1.0, rho=rho_true, Q=3.0), R=R_true +) +gp = celerite2.GaussianProcess(kernel, t=t, yerr=yerr) +y = gp.sample() + +plt.yticks([]) +for m in range(M): + plt.axhline(2 * m, color="k", lw=0.5) +plt.plot(t, y + 2 * np.arange(M), ".") +plt.ylim(-2, 2 * M) +plt.xlim(-1, 11) +plt.xlabel("x") +_ = plt.ylabel("y (with offsets)") + +# + +import pymc3 as pm +import pymc3_ext as pmx +import celerite2.theano as cl2 + +with pm.Model() as model: + + rho = pm.Lognormal("rho", mu=np.log(5.0), sigma=5.0) + chol = pm.LKJCholeskyCov( + "chol", + eta=10.0, + n=M, + sd_dist=pm.Exponential.dist(0.01), + compute_corr=True, + )[0] + R = pm.Deterministic("R", pm.math.dot(chol, chol.T)) + + kernel = cl2.kron.KronTerm( + cl2.terms.SHOTerm(sigma=1.0, rho=rho, Q=3.0), R=R + ) + gp = cl2.GaussianProcess(kernel, t=t, yerr=yerr) + gp.marginal("obs", observed=y) + + soln = pmx.optimize() + +# + +t_pred = np.linspace(-1, 11, 1000) +with model: + mu, var = pmx.eval_in_model(gp.predict(y, t=t_pred, return_var=True), soln) + +plt.yticks([]) +for m in range(M): + plt.axhline(2 * m, color="k", lw=0.5) + plt.plot(t, y[:, m] + 2 * m, ".", color=f"C{m}") + plt.fill_between( + t_pred, + mu[:, m] - np.sqrt(var[:, m]) + 2 * m, + mu[:, m] + np.sqrt(var[:, m]) + 2 * m, + color=f"C{m}", + alpha=0.5, + ) + plt.plot(t_pred, mu[:, m] + 2 * m, color=f"C{m}") + +plt.ylim(-2, 2 * M) +plt.xlim(-1, 11) +plt.xlabel("x") +_ = plt.ylabel("y (with offsets)") +# - + +with model: + trace = pm.sample( + tune=2000, draws=2000, target_accept=0.9, init="adapt_full" + ) + +plt.hist(trace["rho"], 50, histtype="step", color="k") +plt.axvline(rho_true) +plt.yticks([]) +plt.xlabel(r"$\rho$") +plt.ylabel(r"$p(\rho)$") + +for m in range(M): + plt.errorbar( + np.arange(M), + np.mean(trace["R"][:, m, :], axis=0) + m, + yerr=np.std(trace["R"][:, m, :], axis=0), + color=f"C{m}", + ) + plt.plot(np.arange(M), R_true[m] + m, ":", color=f"C{m}") +plt.yticks([]) +plt.xlabel("band index") +_ = plt.ylabel("covariance (with offsets)") diff --git a/setup.py b/setup.py index 9fbf1c0..ef3da7c 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ "emcee", "pymc3", "tqdm", + "pymc3-ext", ], } EXTRA_REQUIRE["dev"] = ( From 40ee27a997c54f4a101a0c8be3759423fd108d46 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 21 Oct 2020 11:55:39 -0400 Subject: [PATCH 26/29] dealing with testvals --- python/celerite2/theano/celerite2.py | 8 +------- python/test/theano/conftest.py | 1 + python/test/theano/test_celerite2.py | 9 ++++++--- python/test/theano/test_ops.py | 1 + 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py index aad68a0..ee84945 100644 --- a/python/celerite2/theano/celerite2.py +++ b/python/celerite2/theano/celerite2.py @@ -8,11 +8,6 @@ from . import ops from .distribution import CeleriteNormal -try: - import pymc3 as pm -except ImportError: - pm = None - CITATIONS = ( ("celerite2:foremanmackey17", "celerite2:foremanmackey18"), r""" @@ -99,8 +94,7 @@ def diagdot(self, a, b): return tt.batched_dot(a.T, b.T) def _add_citations_to_pymc3_model(self, **kwargs): - if not pm: - raise ImportError("pymc3 is required for the 'marginal' method") + import pymc3 as pm model = pm.modelcontext(kwargs.get("model", None)) if not hasattr(model, "__citations__"): diff --git a/python/test/theano/conftest.py b/python/test/theano/conftest.py index baeea58..e91bc99 100644 --- a/python/test/theano/conftest.py +++ b/python/test/theano/conftest.py @@ -7,6 +7,7 @@ def pytest_configure(config): import platform theano.config.floatX = "float64" + theano.config.compute_test_value = "raise" if platform.system() == "Darwin": theano.config.gcc.cxxflags = "-Wno-c++11-narrowing" config.addinivalue_line("filterwarnings", "ignore") diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py index b9caa65..d1ae383 100644 --- a/python/test/theano/test_celerite2.py +++ b/python/test/theano/test_celerite2.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import numpy as np -import pymc3 as pm import pytest import celerite2 @@ -61,8 +60,8 @@ def test_errors(data): gp.log_likelihood(y) # Sorted - gp.compute(x[::-1], diag=diag) with pytest.raises(AssertionError): + gp.compute(x[::-1], diag=diag) gp._d.eval() # 1D @@ -74,8 +73,8 @@ def test_errors(data): gp.compute(x, diag=diag, yerr=np.sqrt(diag)) # Not positive definite - gp.compute(x, diag=-10 * diag) with pytest.raises(celerite2.backprop.LinAlgError): + gp.compute(x, diag=-10 * diag) gp._d.eval() # Not positive definite with `quiet` @@ -100,6 +99,8 @@ def test_errors(data): def test_marginal(data): + import pymc3 as pm + x, diag, y, t = data with pm.Model() as model: @@ -114,6 +115,8 @@ def test_marginal(data): def test_citations(data): + import pymc3 as pm + x, diag, y, t = data with pm.Model() as model: diff --git a/python/test/theano/test_ops.py b/python/test/theano/test_ops.py index 6b36587..62956c2 100644 --- a/python/test/theano/test_ops.py +++ b/python/test/theano/test_ops.py @@ -34,6 +34,7 @@ def convert_values_to_types(values): types.append(tt.dmatrix()) else: raise ValueError("unknown type") + types[-1].tag.test_value = v return types From e71fb6c1425a16487cff23520673daa33ac850cf Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 21 Oct 2020 15:39:57 -0400 Subject: [PATCH 27/29] adding docstrings for kron terms --- docs/api/python.rst | 13 +++++++++++++ python/celerite2/kron.py | 29 +++++++++++++++++++++++++++++ python/celerite2/theano/kron.py | 3 +++ 3 files changed, 45 insertions(+) diff --git a/docs/api/python.rst b/docs/api/python.rst index 81f9f74..058e6a9 100644 --- a/docs/api/python.rst +++ b/docs/api/python.rst @@ -72,3 +72,16 @@ recommended unless you're confident that you know what you're doing. .. autoclass:: celerite2.terms.RealTerm .. autoclass:: celerite2.terms.ComplexTerm .. autoclass:: celerite2.terms.OriginalCeleriteTerm + +Multivariate models +------------------- + +The original *celerite* algorithm was only defined for one dimensional inputs, +but this was generalized by `Gordon et al. (2020) +`_ to support multivariate inputs on tensor +product grids with separable kernels. In this case, the covariance matrix is +given by a Kronecker product. These models are now available in *celerite2* +using the following terms: + +.. autoclass:: celerite2.kron.KronTerm +.. autoclass:: celerite2.kron.LowRankKronTerm diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index 18b35ec..1643b7e 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -8,6 +8,22 @@ class KronTerm(Term): + """A general multivariate celerite term + + This term supports rectangular data with the shape ``N, M`` where ``N`` is + the usual "time" axis or similar, but ``M`` is the dimension of the output + space. The model is that the multivariate data are correlated samples from + an underlying process descirbed by a standard *celerite* kernel. The + covariance between output dimensions is given by the general ``M x M`` + matrix ``R``. More details about this model can be found in `Gordon et al. + (2020) `_. + + Args: + term (celerite2.terms.Term): The celerite term describing the + underlying process. + R (shape[M, M]): The covariance matrix between output dimensions. + """ + __requires_general_addition__ = True @property @@ -187,6 +203,19 @@ def _copy_P(self, P0, P): class LowRankKronTerm(KronTerm): + """A low rank multivariate celerite term + + A low rank version of :class:`celerite2.kron.KronTerm` where the covariance + between outputs is ``R = alpha * alpha^T``, where ``alpha`` is a column + vector of size ``M``. More details about this model can be found in `Gordon + et al. (2020) `_. + + Args: + term (celerite2.terms.Term): The celerite term describing the + underlying process. + alpha (shape[M]): The vector of amplitudes for each output. + """ + def __init__(self, term, *, alpha): self.term = term self.alpha = np.ascontiguousarray( diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index a5c6d01..32f45e9 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -132,6 +132,8 @@ def _copy_P(self, P0): class LowRankKronTerm(KronTerm): + __doc__ = base_kron.LowRankKronTerm.__doc__ + def __init__(self, term, *, alpha): self.term = term self.alpha = tt.as_tensor_variable(alpha) @@ -156,6 +158,7 @@ def _copy_P(self, P0): class KronTermSum(TermSumGeneral): __citations__ = CITATIONS + __doc__ = base_kron.KronTermSum.__doc__ @property def dimension(self): From f978884bd2175302c501d599bc9e4ec562101504 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 22 Oct 2020 08:23:01 -0400 Subject: [PATCH 28/29] simplifying kron needs --- docs/api/python.rst | 3 +- docs/tutorials/kron.py | 11 ++++ python/celerite2/kron.py | 103 +++++++++----------------------- python/celerite2/theano/kron.py | 80 ++++++++++--------------- python/test/test_kron.py | 4 +- python/test/theano/test_kron.py | 4 +- 6 files changed, 74 insertions(+), 131 deletions(-) diff --git a/docs/api/python.rst b/docs/api/python.rst index 058e6a9..66452e3 100644 --- a/docs/api/python.rst +++ b/docs/api/python.rst @@ -81,7 +81,6 @@ but this was generalized by `Gordon et al. (2020) `_ to support multivariate inputs on tensor product grids with separable kernels. In this case, the covariance matrix is given by a Kronecker product. These models are now available in *celerite2* -using the following terms: +using the following: .. autoclass:: celerite2.kron.KronTerm -.. autoclass:: celerite2.kron.LowRankKronTerm diff --git a/docs/tutorials/kron.py b/docs/tutorials/kron.py index 02e909c..4a50e9f 100644 --- a/docs/tutorials/kron.py +++ b/docs/tutorials/kron.py @@ -22,6 +22,17 @@ # # Multivariate models # +# The original *celerite* package only supported one-dimensional data (like time series), but [Gordon et al. (2020)](https://arxiv.org/abs/2007.05799) generalized the method to multivariate data on a tensor product grid. +# This has been implmented in *celerite2* so "rectangular" data are now supported. +# The main application discussed by [Gordon et al. (2020)](https://arxiv.org/abs/2007.05799) was multiwavelength observations of transiting exoplanets, but this can be applicable to many other problems with the following structure: +# +# 1. The rectagular data must be fully filled: you must have observations in every band at every time. We've developed a method to handle missing data and that will be included in a future release. +# 2. The covariance matrix must be seperable with the form `k({x, y}_n, {x, y}_m) = k1(x_n, x_m) * k2(y_n, y_m)`, where `x` (a scalar) indexes the "longest" one-dimensional axis of the data (for example, time) and `y` (optionally a vector) indexes the narrower axis of the data (for example, wavelength). To apply *celerite*, we must make the further assumption that `k1(x_n, x_m)` is a standard *celerite* kernel, but no limitations are placed on the form of `k2(y_n, y_m)`. +# +# The implementation of this method in *celerite2* comes with two forms for the kernel: +# +# 1. `kron.KronTerm`: A general form of the model where `k2(y_n, y_m)` is specified as a full-rank `M x M` matrix called `R`, where `M` is the size of the `y` dimension. The computational cost of evaluating this model scales as `O(N * J^2 * M^3)` where `N` is the size of the `x` dimension and `J` is the rank of the *celerite* term describing `k1(x_n, x_m)`. +# 2. `kron.LowRankKronTerm`: A more computationally efficient method where # + import numpy as np diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index 1643b7e..c7a055d 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"] +__all__ = ["KronTerm", "KronTermSum"] import numpy as np @@ -30,21 +30,32 @@ class KronTerm(Term): def dimension(self): return self.M - def __init__(self, term, *, R): + def __init__(self, term, *, R=None, L=None): self.term = term - self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) - self.M = len(self.R) - if self.M < 1: - raise ValueError("At least one 'band' is required") - if self.R.ndim != 2 or self.R.shape != (self.M, self.M): - raise ValueError( - "R must be a square matrix; " - "use a LowRankKronTerm for the low rank model" - ) + + if R is not None: + if L is not None: + raise ValueError("Only one of 'R' and 'L' can be defined") + self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64) + try: + self.L = np.linalg.cholesky(self.R) + except np.linalg.LinAlgError: + M = np.copy(self.R) + M[np.diag_indices_from(M)] += 1e-10 + self.L = np.linalg.cholesky(M) + elif L is not None: + self.L = np.ascontiguousarray(L, dtype=np.float64) + if self.L.ndim == 1: + self.L = np.reshape(self.L, (-1, 1)) + self.R = np.dot(self.L, self.L.T) + else: + raise ValueError("One of 'R' and 'L' must be defined") + + self.M, self.K = self.L.shape self.alpha2 = np.diag(self.R) def __len__(self): - return len(self.term) * self.M + return len(self.term) * self.K def __add__(self, b): if self.dimension != b.dimension: @@ -140,7 +151,7 @@ def get_celerite_matrices( x, np.zeros_like(x) ) J0 = U_sub.shape[1] - J = self._get_J(J0) + J = self.K * J0 # Allocate memory as requested if a is None: @@ -167,12 +178,13 @@ def get_celerite_matrices( a[:] = ( diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)) ).flatten() - U[:] = np.kron(U_sub, self._get_U_kron()) - V[:] = np.kron(V_sub, self._get_V_kron()) + U[:] = np.kron(U_sub, self.L) + V[:] = np.kron(V_sub, self.L) c = np.concatenate((cr, cc, cc)) P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0]) - self._copy_P(P0, P) + if self.K > 1: + P[:] = np.tile(P0[:, :, None], (1, 1, self.K)).reshape(P.shape) return a, U, V, P @@ -188,65 +200,6 @@ def get_conditional_mean_matrices(self, x, t): "Conditional mean matrices have not (yet!) been implemented" ) - # The following should be implemented by subclasses - def _get_J(self, J0): - return self.M * J0 - - def _get_U_kron(self): - return self.R - - def _get_V_kron(self): - return np.eye(self.M) - - def _copy_P(self, P0, P): - P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape(P.shape) - - -class LowRankKronTerm(KronTerm): - """A low rank multivariate celerite term - - A low rank version of :class:`celerite2.kron.KronTerm` where the covariance - between outputs is ``R = alpha * alpha^T``, where ``alpha`` is a column - vector of size ``M``. More details about this model can be found in `Gordon - et al. (2020) `_. - - Args: - term (celerite2.terms.Term): The celerite term describing the - underlying process. - alpha (shape[M]): The vector of amplitudes for each output. - """ - - def __init__(self, term, *, alpha): - self.term = term - self.alpha = np.ascontiguousarray( - np.atleast_1d(alpha), dtype=np.float64 - ) - if self.alpha.ndim != 1: - raise ValueError( - "alpha must be a vector; " - "use a general KronTerm for a full rank model" - ) - self.M = len(self.alpha) - if self.M < 1: - raise ValueError("At least one 'band' is required") - self.R = np.outer(self.alpha, self.alpha) - self.alpha2 = self.alpha ** 2 - - def __len__(self): - return len(self.term) - - def _get_J(self, J0): - return J0 - - def _get_U_kron(self): - return self.alpha[:, None] - - def _get_V_kron(self): - return self.alpha[:, None] - - def _copy_P(self, P0, P): - pass - class KronTermSum(TermSumGeneral): @property diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index 32f45e9..9dd22b4 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- -__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"] +__all__ = ["KronTerm", "KronTermSum"] from theano import tensor as tt -from theano.tensor.slinalg import kron +from theano.tensor.slinalg import Cholesky, kron from .. import kron as base_kron from .terms import Term, TermSumGeneral @@ -31,6 +31,9 @@ ) +cholesky = Cholesky(lower=True) + + class KronTerm(Term): __requires_general_addition__ = True __doc__ = base_kron.KronTerm.__doc__ @@ -40,15 +43,29 @@ class KronTerm(Term): def dimension(self): return self.M - def __init__(self, term, *, R): + def __init__(self, term, *, R=None, L=None): self.term = term - self.R = tt.as_tensor_variable(R) - self.M = self.R.shape[0] - if self.R.ndim != 2: - raise ValueError( - "R must be a square matrix; " - "use a LowRankKronTerm for the low rank model" - ) + + if R is not None: + if L is not None: + raise ValueError("Only one of 'R' and 'L' can be defined") + self.R = tt.as_tensor_variable(R) + self.L = cholesky(self.R + 1e-10 * tt.eye(R.shape[0])) + self.M = self.R.shape[0] + self.K = self.R.shape[0] + + elif L is not None: + self.L = tt.as_tensor_variable(L) + self.M = self.L.shape[0] + if self.L.ndim == 1: + self.L = tt.reshape(self.L, (-1, 1)) + self.K = 1 + else: + self.K = self.L.shape[1] + self.R = tt.dot(self.L, self.L.T) + else: + raise ValueError("One of 'R' and 'L' must be defined") + self.alpha2 = tt.diag(self.R) def __add__(self, b): @@ -104,12 +121,12 @@ def get_celerite_matrices(self, x, diag): a = tt.reshape( diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,) ) - U = kron(U_sub, self._get_U_kron()) - V = kron(V_sub, self._get_V_kron()) + U = kron(U_sub, self.L) + V = kron(V_sub, self.L) c = tt.concatenate((cr, cc, cc)) P0 = tt.exp(-c[None, :] * dx[:, None]) - P = self._copy_P(P0) + P = tt.tile(P0[:, :, None], (1, 1, self.K)).reshape((P0.shape[0], -1)) return a, U, V, P @@ -118,43 +135,6 @@ def get_conditional_mean_matrices(self, x, t): "Conditional mean matrices have not (yet!) been implemented" ) - # The following should be implemented by subclasses - def _get_U_kron(self): - return self.R - - def _get_V_kron(self): - return tt.eye(self.M) - - def _copy_P(self, P0): - return tt.tile(P0[:, :, None], (1, 1, self.M)).reshape( - (P0.shape[0], -1) - ) - - -class LowRankKronTerm(KronTerm): - __doc__ = base_kron.LowRankKronTerm.__doc__ - - def __init__(self, term, *, alpha): - self.term = term - self.alpha = tt.as_tensor_variable(alpha) - if self.alpha.ndim != 1: - raise ValueError( - "alpha must be a vector; " - "use a general KronTerm for a full rank model" - ) - self.M = self.alpha.shape[0] - self.R = self.alpha[None, :] * self.alpha[:, None] - self.alpha2 = self.alpha ** 2 - - def _get_U_kron(self): - return self.alpha[:, None] - - def _get_V_kron(self): - return self.alpha[:, None] - - def _copy_P(self, P0): - return P0 - class KronTermSum(TermSumGeneral): __citations__ = CITATIONS diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 15e6a1f..8d18722 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -125,7 +125,7 @@ def test_low_rank_value(): alpha = np.random.randn(M) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) - term = kron.LowRankKronTerm(term0, alpha=alpha) + term = kron.KronTerm(term0, L=alpha) a, U, V, P = term.get_celerite_matrices(x, diag) assert a.shape == (N * M,) @@ -156,7 +156,7 @@ def test_sum_value(): R = np.dot(R, R.T) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) - term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha) + term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha) a, U, V, P = term.get_celerite_matrices(x, diag) assert a.shape == (N * M,) diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index 9a98d3e..8d27d3c 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -104,7 +104,7 @@ def test_low_rank_value(data): alpha = np.random.randn(M) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) - term = kron.LowRankKronTerm(term0, alpha=alpha) + term = kron.KronTerm(term0, L=alpha) a, U, V, P = term.get_celerite_matrices(x, diag) assert a.eval().shape == (N * M,) @@ -130,7 +130,7 @@ def test_sum_value(data): R = np.dot(R, R.T) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) - term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha) + term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha) a, U, V, P = term.get_celerite_matrices(x, diag) assert a.eval().shape == (N * M,) From 3865fbe1cb191ddcd648dbf1e50873c2430a17d1 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Mon, 2 Nov 2020 14:58:12 -0500 Subject: [PATCH 29/29] adding initial support for missing data --- python/celerite2/celerite2.py | 93 ++++++++++++++++++++----- python/celerite2/kron.py | 44 +++++++++--- python/celerite2/terms.py | 53 ++++++++++++-- python/celerite2/theano/kron.py | 25 ++++--- python/celerite2/theano/terms.py | 19 ++++-- python/test/test_celerite2.py | 3 - python/test/test_kron.py | 114 +++++++++++++++++++------------ python/test/theano/test_kron.py | 20 ++++++ 8 files changed, 276 insertions(+), 95 deletions(-) diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py index b8474de..4099ad1 100644 --- a/python/celerite2/celerite2.py +++ b/python/celerite2/celerite2.py @@ -38,6 +38,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs): self._t = None self._mean_value = None self._diag = None + self._mask = None self._size = None self._shape = None self._log_det = -np.inf @@ -72,7 +73,14 @@ def mean_value(self): return self._mean_value def compute( - self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False + self, + t, + *, + yerr=None, + diag=None, + mask=None, + check_sorted=True, + quiet=False, ): """Compute the Cholesky factorization of the GP covariance matrix @@ -117,7 +125,7 @@ def compute( self._shape = (N,) self._diag = np.empty(self._shape) - self._size = self._diag.size + self._full_size = self._diag.size if yerr is None and diag is None: self._diag[:] = 0.0 @@ -138,6 +146,10 @@ def compute( ) self._diag[:] = diag + self._mask = None + if mask is not None: + self._mask = np.ascontiguousarray(mask, dtype=bool) + # Fill the celerite matrices ( self._d, @@ -145,9 +157,18 @@ def compute( self._V, self._P, ) = self.kernel.get_celerite_matrices( - self._t, self._diag, a=self._d, U=self._U, V=self._V, P=self._P + self._t, + self._diag, + a=self._d, + U=self._U, + V=self._V, + P=self._P, + mask=self._mask, ) + # Get the correct size even with a mask + self._size = self._d.size + # Compute the Cholesky factorization try: self._d, self._W = driver.factor( @@ -183,7 +204,11 @@ def recompute(self, *, quiet=False): "You must call 'compute' directly at least once" ) return self.compute( - self._t, diag=self._diag, check_sorted=False, quiet=quiet + self._t, + diag=self._diag, + mask=self._mask, + check_sorted=False, + quiet=quiet, ) def _process_input(self, y, *, inplace=False, require_vector=False): @@ -212,9 +237,9 @@ def _process_input(self, y, *, inplace=False, require_vector=False): ) if len(self._shape) == 2: - y = np.reshape(y, (self._size,) + y.shape[2:]) + y = np.reshape(y, (self._full_size,) + y.shape[2:]) - if require_vector and (self._size,) != y.shape: + if require_vector and (self._full_size,) != y.shape: raise ValueError("'y' must be one dimensional") return y @@ -269,9 +294,16 @@ def dot_tril(self, y, *, inplace=False): ValueError: When the inputs are not valid (shape, number, etc.). """ y = self._process_input(y, inplace=inplace) - return self._reshape_output( - driver.dot_tril(self._U, self._P, self._d, self._W, y) - ) + + if self._mask is None: + alpha = driver.dot_tril(self._U, self._P, self._d, self._W, y) + else: + alpha = np.zeros((self._full_size,) + y.shape[1:]) + alpha[self._mask.flatten()] = driver.dot_tril( + self._U, self._P, self._d, self._W, y[self._mask.flatten()] + ) + + return self._reshape_output(alpha) def log_likelihood(self, y, *, inplace=False): """Compute the marginalized likelihood of the GP model @@ -298,9 +330,14 @@ def log_likelihood(self, y, *, inplace=False): ) if not np.isfinite(self._log_det): return -np.inf - loglike = self._norm - 0.5 * driver.norm( - self._U, self._P, self._d, self._W, y - ) + if self._mask is None: + loglike = self._norm - 0.5 * driver.norm( + self._U, self._P, self._d, self._W, y + ) + else: + loglike = self._norm - 0.5 * driver.norm( + self._U, self._P, self._d, self._W, y[self._mask.flatten()] + ) if not np.isfinite(loglike): return -np.inf return loglike @@ -349,7 +386,18 @@ def predict( y = self._process_input( y - mean_value, inplace=True, require_vector=True ) - alpha = driver.solve(self._U, self._P, self._d, self._W, np.copy(y)) + if self._mask is None: + alpha = driver.solve( + self._U, self._P, self._d, self._W, np.copy(y) + ) + else: + alpha = driver.solve( + self._U, + self._P, + self._d, + self._W, + np.copy(y[self._mask.flatten()]), + ) if t is None: xs = self._t @@ -364,7 +412,7 @@ def predict( if kernel is None: kernel = self.kernel - if t is None: + if t is None and self._mask is None: mu = self._reshape_output(y - self._diag.flatten() * alpha) if include_mean: mu += mean_value @@ -399,6 +447,8 @@ def predict( if mu is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) + if self._mask is not None: + KxsT = KxsT[self._mask.flatten(), :] mu = self._reshape_output(np.dot(KxsT.T, alpha)) if include_mean: mu += self._mean(xs) @@ -409,6 +459,8 @@ def predict( # Predictive variance. if KxsT is None: KxsT = kernel.get_value(xs[None, :] - self._t[:, None]) + if self._mask is not None: + KxsT = KxsT[self._mask.flatten(), :] if return_var: var = np.squeeze( np.diag(kernel.get_value(0.0)) @@ -451,11 +503,18 @@ def sample(self, *, size=None, include_mean=True): if self._t is None: raise RuntimeError("you must call 'compute' first") if size is None: - n = np.random.randn(*self._shape) + n = np.random.randn(self._size) else: - n = np.random.randn(*(self._shape + (size,))) + n = np.random.randn(self._size, size) + + n = driver.dot_tril(self._U, self._P, self._d, self._W, n) + + if self._mask is not None: + tmp = np.zeros((self._full_size,) + n.shape[1:]) + tmp[self._mask.flatten()] = n + n = tmp - result = self.dot_tril(n, inplace=True) + result = np.reshape(n, self._shape + n.shape[1:]) if size is not None: result = np.moveaxis(result, -1, 0) if include_mean: diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py index c7a055d..0a22eab 100644 --- a/python/celerite2/kron.py +++ b/python/celerite2/kron.py @@ -4,6 +4,7 @@ import numpy as np +from . import driver from .terms import Term, TermSumGeneral @@ -114,7 +115,15 @@ def get_psd(self, omega): return self.term.get_psd(omega) def get_celerite_matrices( - self, x, diag, *, a=None, U=None, V=None, P=None + self, + x, + diag, + *, + a=None, + U=None, + V=None, + P=None, + mask=None, ): """Get the matrices needed to solve the celerite system @@ -143,7 +152,14 @@ def get_celerite_matrices( N0, M = diag.shape if len(x) != N0 or M != self.M: raise ValueError("'diag' must have the shape (N, M)") - N = N0 * M + + if mask is None: + N = N0 * M + else: + mask = np.ascontiguousarray(mask, dtype=bool) + if mask.shape != (N0, M): + raise ValueError("'mask' must have the shape (N, M)") + N = mask.sum() # Compute the coefficients and kernel for the celerite subproblem ar, cr, ac, _, cc, _ = self.term.get_coefficients() @@ -171,16 +187,22 @@ def get_celerite_matrices( else: P.resize((N - 1, J), refcheck=False) - # Expand the times appropriately - x_full = np.tile(x[:, None], (1, self.M)).flatten() - dx = x_full[1:] - x_full[:-1] - - a[:] = ( - diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)) - ).flatten() - U[:] = np.kron(U_sub, self.L) - V[:] = np.kron(V_sub, self.L) + if mask is None: + x_full = np.tile(x[:, None], (1, self.M)).flatten() + a[:] = ( + diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)) + ).flatten() + U[:] = np.kron(U_sub, self.L) + V[:] = np.kron(V_sub, self.L) + else: + x_full = np.tile(x[:, None], (1, self.M))[mask].flatten() + a[:] = (diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)))[ + mask + ].flatten() + U[:] = np.kron(U_sub, self.L)[mask.flatten()] + V[:] = np.kron(V_sub, self.L)[mask.flatten()] + dx = x_full[1:] - x_full[:-1] c = np.concatenate((cr, cc, cc)) P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0]) if self.K > 1: diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py index eafeadb..62e86ac 100644 --- a/python/celerite2/terms.py +++ b/python/celerite2/terms.py @@ -71,7 +71,7 @@ def to_dense(self, x, diag): K[np.diag_indices_from(K)] += diag return K - def dot(self, x, diag, y): + def dot(self, x, diag, y, **kwargs): """Apply a matrix-vector or matrix-matrix product Args: @@ -80,7 +80,7 @@ def dot(self, x, diag, y): y (shape[N] or shape[N, K]): The target of vector or matrix for this operation. """ - a, U, V, P = self.get_celerite_matrices(x, diag) + a, U, V, P = self.get_celerite_matrices(x, diag, **kwargs) y = np.atleast_1d(y) if y.shape[0] != len(a): @@ -153,7 +153,15 @@ def get_psd(self, omega): return np.sqrt(2 / np.pi) * psd def get_celerite_matrices( - self, x, diag, *, a=None, U=None, V=None, P=None + self, + x, + diag, + *, + a=None, + U=None, + V=None, + P=None, + mask=None, ): """Get the matrices needed to solve the celerite system @@ -175,6 +183,11 @@ def get_celerite_matrices( Raises: ValueError: When the inputs are not valid. """ + if mask is not None: + raise NotImplementedError( + "Missing data is only implemented for KronTerm models" + ) + x = np.atleast_1d(x) diag = np.atleast_1d(diag) if len(x.shape) != 1: @@ -319,7 +332,15 @@ def get_psd(self, omega): return p def get_celerite_matrices( - self, x, diag, *, a=None, U=None, V=None, P=None + self, + x, + diag, + *, + a=None, + U=None, + V=None, + P=None, + mask=None, ): """Get the matrices needed to solve the celerite system @@ -343,7 +364,13 @@ def get_celerite_matrices( """ x = np.atleast_1d(x) diag = np.atleast_1d(diag) - N = diag.size + + if mask is None: + N = diag.size + else: + mask = np.ascontiguousarray(mask, dtype=bool) + N = mask.sum() + sizes = [len(term) for term in self.terms] J = sum(sizes) @@ -369,6 +396,7 @@ def get_celerite_matrices( x, diag, a=a, + mask=mask, ) for dj, term in zip(sizes[1:], self.terms[1:]): ( @@ -379,6 +407,7 @@ def get_celerite_matrices( ) = term.get_celerite_matrices( x, np.zeros_like(diag), + mask=mask, ) a[:] += da j += dj @@ -524,7 +553,15 @@ def __len__(self): return len(self.term) def get_celerite_matrices( - self, x, diag, *, a=None, U=None, V=None, P=None + self, + x, + diag, + *, + a=None, + U=None, + V=None, + P=None, + mask=None, ): dt = self.delta ar, cr, a, b, c, d = self.term.get_coefficients() @@ -555,7 +592,9 @@ def get_celerite_matrices( new_diag = diag + delta_diag - return super().get_celerite_matrices(x, new_diag, a=a, U=U, V=V, P=P) + return super().get_celerite_matrices( + x, new_diag, a=a, U=U, V=V, P=P, mask=mask + ) def get_coefficients(self): ar, cr, a, b, c, d = self.term.get_coefficients() diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py index 9dd22b4..fe6d77f 100644 --- a/python/celerite2/theano/kron.py +++ b/python/celerite2/theano/kron.py @@ -101,7 +101,7 @@ def to_dense(self, x, diag): def get_psd(self, omega): return self.term.get_psd(omega) - def get_celerite_matrices(self, x, diag): + def get_celerite_matrices(self, x, diag, mask=None): # Check the input dimensions x = tt.as_tensor_variable(x) diag = tt.as_tensor_variable(diag) @@ -114,16 +114,25 @@ def get_celerite_matrices(self, x, diag): x, tt.zeros_like(x) ) - # Expand the times appropriately - x_full = tt.reshape(tt.tile(x[:, None], (1, self.M)), (-1,)) - dx = x_full[1:] - x_full[:-1] - - a = tt.reshape( - diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,) - ) + # Pad everything out ignoring the mask + x_full = tt.tile(x[:, None], (1, self.M)) + a = diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)) U = kron(U_sub, self.L) V = kron(V_sub, self.L) + # Apply the mask or flatten + if mask is None: + x_full = tt.reshape(x_full, (-1,)) + a = tt.reshape(a, (-1,)) + else: + mask = tt.as_tensor_variable(mask) + mask_flat = tt.reshape(mask, (-1,)) + x_full = x_full[mask] + a = a[mask] + U = U[mask_flat] + V = V[mask_flat] + + dx = x_full[1:] - x_full[:-1] c = tt.concatenate((cr, cc, cc)) P0 = tt.exp(-c[None, :] * dx[:, None]) P = tt.tile(P0[:, :, None], (1, 1, self.K)).reshape((P0.shape[0], -1)) diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py index 90d7a8e..8c41fb4 100644 --- a/python/celerite2/theano/terms.py +++ b/python/celerite2/theano/terms.py @@ -85,7 +85,12 @@ def to_dense(self, x, diag): K += tt.diag(diag) return K - def get_celerite_matrices(self, x, diag): + def get_celerite_matrices(self, x, diag, *, mask=None): + if mask is not None: + raise NotImplementedError( + "Missing data is only implemented for KronTerm models" + ) + x = tt.as_tensor_variable(x) diag = tt.as_tensor_variable(diag) ar, cr, ac, bc, cc, dc = self.coefficients @@ -132,8 +137,8 @@ def get_conditional_mean_matrices(self, x, t): return U_star, V_star, inds - def dot(self, x, diag, y): - a, U, V, P = self.get_celerite_matrices(x, diag) + def dot(self, x, diag, y, **kwargs): + a, U, V, P = self.get_celerite_matrices(x, diag, **kwargs) return ops.matmul(a, U, V, P, tt.as_tensor_variable(y))[0] @@ -195,7 +200,7 @@ def get_psd(self, omega): p += term.get_psd(omega) return p - def get_celerite_matrices(self, x, diag): + def get_celerite_matrices(self, x, diag, *, mask=None): x = tt.as_tensor_variable(x) diag = tt.as_tensor_variable(diag) zeros = tt.zeros_like(diag) @@ -205,7 +210,7 @@ def get_celerite_matrices(self, x, diag): V = [] P = [] for term in self.terms: - args = term.get_celerite_matrices(x, zeros) + args = term.get_celerite_matrices(x, zeros, mask=mask) a.append(args[0]) U.append(args[1]) V.append(args[2]) @@ -344,7 +349,7 @@ def __init__(self, term, delta, **kwargs): self.delta = tt.as_tensor_variable(delta).astype("float64") super().__init__(**kwargs) - def get_celerite_matrices(self, x, diag): + def get_celerite_matrices(self, x, diag, *, mask=None): dt = self.delta ar, cr, a, b, c, d = self.term.coefficients @@ -373,7 +378,7 @@ def get_celerite_matrices(self, x, diag): ) new_diag = diag + delta_diag - return super().get_celerite_matrices(x, new_diag) + return super().get_celerite_matrices(x, new_diag, mask=mask) def get_coefficients(self): ar, cr, a, b, c, d = self.term.coefficients diff --git a/python/test/test_celerite2.py b/python/test/test_celerite2.py index 4b130d9..c41f26a 100644 --- a/python/test/test_celerite2.py +++ b/python/test/test_celerite2.py @@ -73,9 +73,6 @@ def test_consistency(oterm, mean, data): dict(return_cov=False, return_var=True), dict(return_cov=True, return_var=False), ]: - print(args) - print(original_gp.predict(y, **args)) - print(gp.predict(y, **args)) assert all( np.allclose(a, b) for a, b in zip( diff --git a/python/test/test_kron.py b/python/test/test_kron.py index 8d18722..f93602c 100644 --- a/python/test/test_kron.py +++ b/python/test/test_kron.py @@ -1,14 +1,26 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest from celerite2 import GaussianProcess, kron, terms -def check_value(term, x, diag, y, t): +@pytest.fixture +def data(): + N = 100 + M = 5 + np.random.seed(105) + x = np.sort(np.random.uniform(0, 10, N)) + t = np.sort(np.random.uniform(-1, 11, 25)) + diag = np.random.uniform(0.1, 0.5, (N, M)) + y = np.random.randn(N * M, 3) + return N, M, x, diag, y, t + + +def check_value(term, x, diag, y, t, mask=None): N, M = diag.shape K = term.get_value(x[:, None] - x[None, :]) - try: K0 = term.term.get_value(x[:, None] - x[None, :]) except AttributeError: @@ -16,32 +28,48 @@ def check_value(term, x, diag, y, t): else: assert np.allclose(np.kron(K0, term.R), K) - K[np.diag_indices_from(K)] += diag.flatten() - K0 = term.dot(x, diag, np.eye(diag.size)) + mask0 = mask + if mask is not None: + K00 = K[:, mask.flatten()] + K = K00[mask.flatten(), :] + else: + K00 = np.copy(K) + mask = np.ones_like(diag, dtype=bool) + + K[np.diag_indices_from(K)] += diag.flatten()[mask.flatten()] + + K0 = term.dot(x, diag, np.eye(mask.sum()), mask=mask) assert np.allclose(K, K0) - assert np.allclose(np.dot(K, y), term.dot(x, diag, y)) + assert np.allclose( + np.dot(K, y[mask.flatten()]), + term.dot(x, diag, y[mask.flatten()], mask=mask), + ) - gp = GaussianProcess(term, t=x, diag=diag) + gp = GaussianProcess(term, t=x, diag=diag, mask=mask0) # "log_likelihood" method yval = y[:, 0].reshape((N, M)) - alpha = np.linalg.solve(K, y[:, 0]) + alpha = np.linalg.solve(K, y[mask.flatten(), 0]) loglike = -0.5 * ( - np.dot(y[:, 0], alpha) + np.dot(y[mask.flatten(), 0], alpha) + np.linalg.slogdet(K)[1] + len(K) * np.log(2 * np.pi) ) assert np.allclose(loglike, gp.log_likelihood(yval)) # Predict - K0 = K - np.diag(diag.flatten()) + K0 = K00 mu0 = np.dot(K0, alpha) - cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T)) + cov0 = term.get_value(x[:, None] - x[None, :]) - np.dot( + K0, np.linalg.solve(K, K0.T) + ) mu, var = gp.predict(yval, return_var=True) _, cov = gp.predict(yval, return_cov=True) - assert np.allclose(mu, mu0.reshape((N, M))) - assert np.allclose(var, np.diag(cov0).reshape((N, M))) + assert mu.shape == (N, M) + assert var.shape == (N, M) + assert np.allclose(mu.flatten(), mu0) + assert np.allclose(var.flatten(), np.diag(cov0)) assert np.allclose(cov, cov0.reshape((N, M, N, M))) mu1, var1 = gp.predict(yval, t=x, return_var=True) @@ -50,7 +78,7 @@ def check_value(term, x, diag, y, t): assert np.allclose(var, var1) assert np.allclose(cov, cov1) - K0 = term.get_value(t[:, None] - x[None, :]) + K0 = term.get_value(t[:, None] - x[None, :])[:, mask.flatten()] mu0 = np.dot(K0, alpha) cov0 = term.get_value(t[:, None] - t[None, :]) - np.dot( K0, np.linalg.solve(K, K0.T) @@ -67,14 +95,15 @@ def check_value(term, x, diag, y, t): a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K))) np.random.seed(seed) b = gp.sample() - assert np.allclose(a.reshape((N, M)), b) + assert np.allclose(a, b[mask]) np.random.seed(seed) a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K), 10)) np.random.seed(seed) b = gp.sample(size=10) assert np.allclose( - np.ascontiguousarray(np.moveaxis(a, -1, 0)).reshape((10, N, M)), b + np.moveaxis(a, -1, 0), + b[:, mask], ) # "sample_conditional" method, numerics make this one a little unstable; @@ -86,15 +115,8 @@ def check_value(term, x, diag, y, t): assert b.shape == (10, len(t), M) -def test_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_value(data): + N, M, x, diag, y, t = data R = np.random.randn(M, M) R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) @@ -113,15 +135,8 @@ def test_value(): check_value(term, x, diag, y, t) -def test_low_rank_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_low_rank_value(data): + N, M, x, diag, y, t = data alpha = np.random.randn(M) term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) @@ -139,15 +154,8 @@ def test_low_rank_value(): assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y)) -def test_sum_value(): - N = 100 - M = 5 - - np.random.seed(105) - x = np.sort(np.random.uniform(0, 10, N)) - t = np.sort(np.random.uniform(-1, 11, 25)) - diag = np.random.uniform(0.1, 0.5, (N, M)) - y = np.random.randn(N * M, 3) +def test_sum_value(data): + N, M, x, diag, y, t = data alpha = np.random.randn(M) R = np.random.randn(M, M) @@ -165,3 +173,25 @@ def test_sum_value(): assert P.shape == (N * M - 1, 2 * M + 2) check_value(term, x, diag, y, t) + + +def test_missing_values(data): + N, M, x, diag, y, t = data + mask = np.random.rand(N, M) > 0.1 + assert np.all(mask.sum(axis=1) > 0) + + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + + a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask) + assert a.shape == (mask.sum(),) + assert U.shape == (mask.sum(), 2 * M) + assert V.shape == (mask.sum(), 2 * M) + assert P.shape == (mask.sum() - 1, 2 * M) + + check_value(term, x, diag, y, t, mask=mask) diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py index 8d27d3c..e42e2b1 100644 --- a/python/test/theano/test_kron.py +++ b/python/test/theano/test_kron.py @@ -149,3 +149,23 @@ def test_citations(data): gp = GaussianProcess(term, t=x, diag=diag) gp.marginal("obs", observed=y[:, 0].reshape((N, M))) assert model.__citations__["celerite2:kernel"] == kron.CITATIONS + + +def test_missing_values(data): + N, M, x, diag, y, t = data + mask = np.random.rand(N, M) > 0.1 + assert np.all(mask.sum(axis=1) > 0) + + R = np.random.randn(M, M) + R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)]) + R[np.triu_indices_from(R, 1)] = 0.0 + R = np.dot(R, R.T) + + term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3) + term = kron.KronTerm(term0, R=R) + + K = term.get_value(x[:, None] - x[None, :]).eval() + K[np.diag_indices_from(K)] += diag.flatten() + K = K[mask.flatten(), :][:, mask.flatten()] + K0 = term.dot(x, diag, np.eye(mask.sum()), mask=mask).eval() + assert np.allclose(K0.shape, K.shape)