From b2b8ae2cbd2d9b48f7237188aeff2a8d59421826 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 16 Sep 2020 17:10:49 -0400
Subject: [PATCH 01/29] starting to implement kron terms

---
 python/celerite2/kron.py | 83 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 python/celerite2/kron.py

diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
new file mode 100644
index 0000000..b63b5ff
--- /dev/null
+++ b/python/celerite2/kron.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+
+from .terms import Term
+
+
+class KronTerm(Term):
+    def __init__(self, term, *, R):
+        self.term = term
+        self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
+        self.M = len(self.R)
+        if self.R.ndim != 2 or self.R.shape != (self.M, self.M):
+            raise ValueError(
+                "R must be a square matrix; "
+                "use a LowRankKronTerm for the low rank model"
+            )
+
+    def __add__(self, b):
+        return KronTermSum(self, b)
+
+    def __mul__(self, b):
+        raise NotImplementedError(
+            "multiplication is not implemented for KronTerm"
+        )
+
+    def get_coefficients(self):
+        raise ValueError("KronTerm objects don't provide coefficients")
+
+    def get_celerite_matrices(
+        self, x, diag, *, a=None, U=None, V=None, P=None
+    ):
+        # Check the input dimensions
+        x = np.atleast_1d(x)
+        diag = np.atleast_1d(diag)
+        N = len(diag)
+        if N != len(x) * self.M:
+            raise ValueError("'diag' must have the shape 'N * M'")
+
+        # Compute the kernel for the celerite subproblem
+        a_sub, U_sub, V_sub, _ = self.term.get_celerite_matrices(
+            x, np.zeros_like(x)
+        )
+        J = U_sub.shape[1]
+
+        # Allocate memory as requested
+        if a is None:
+            a = np.empty(N)
+        else:
+            a.resize(N, refcheck=False)
+        if U is None:
+            U = np.empty((N, J))
+        else:
+            U.resize((N, J), refcheck=False)
+        if V is None:
+            V = np.empty((N, J))
+        else:
+            V.resize((N, J), refcheck=False)
+        if P is None:
+            P = np.empty((N - 1, J))
+        else:
+            P.resize((N - 1, J), refcheck=False)
+
+        # Expand the times appropriately
+        x_full = np.repeat(x, self.M)
+        dx = x_full[1:] - x_full[:-1]
+        a[:] = diag
+
+        a[:] = diag + tt.diag(self.R)[:, None] * (tt.sum(ar) + tt.sum(ac))
+
+        # a = tt.reshape(a.T, (1, a.size))[0]
+        # U = tt.slinalg.kron(U, self.R)
+        # V = tt.slinalg.kron(V, tt.eye(self.R.shape[0]))
+        # c = tt.concatenate((cr, cc, cc))
+        # P = tt.exp(-c[None, :] * dx[:, None])
+        # P = tt.tile(P, (1, self.R.shape[0]))
+
+
+class LowRankKronTerm(KronTerm):
+    pass
+
+
+class KronTermSum(Term):
+    pass

From c252a17ebcb52ecd37846fa4bf58f6301491fc42 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 16 Sep 2020 19:31:59 -0400
Subject: [PATCH 02/29] dealing with shapes

---
 python/celerite2/celerite2.py | 15 ++++++++-----
 python/celerite2/kron.py      | 40 +++++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 44fb0b6..03ed52f 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -38,6 +38,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
         self._t = None
         self._mean_value = None
         self._diag = None
+        self._size = None
         self._log_det = -np.inf
         self._norm = np.inf
 
@@ -103,7 +104,11 @@ def compute(
         # Save the diagonal
         self._t = np.ascontiguousarray(t, dtype=np.float64)
         self._mean_value = self._mean(self._t)
-        self._diag = np.empty_like(self._t)
+        try:
+            self._diag = np.empty((len(self._t), self.kernel.M))
+        except AttributeError:
+            self._diag = np.empty_like(self._t)
+        self._size = self._diag.size
         if yerr is None and diag is None:
             self._diag[:] = 0.0
 
@@ -169,9 +174,9 @@ def _process_input(self, y, *, inplace=False, require_vector=False):
         y = np.atleast_1d(y)
         if self._t is None:
             raise RuntimeError("you must call 'compute' first")
-        if self._t.shape[0] != y.shape[0]:
+        if self._size != y.shape[0]:
             raise ValueError("dimension mismatch")
-        if require_vector and self._t.shape != y.shape:
+        if require_vector and (self._size,) != y.shape:
             raise ValueError("'y' must be one dimensional")
         if inplace:
             if (
@@ -386,9 +391,9 @@ def sample(self, *, size=None, include_mean=True):
         if self._t is None:
             raise RuntimeError("you must call 'compute' first")
         if size is None:
-            n = np.random.randn(len(self._t))
+            n = np.random.randn(self._size)
         else:
-            n = np.random.randn(len(self._t), size)
+            n = np.random.randn(self._size, size)
         result = self.dot_tril(n, inplace=True).T
         if include_mean:
             result += self._mean(self._t)
diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index b63b5ff..a4c2430 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -31,16 +31,19 @@ def get_celerite_matrices(
     ):
         # Check the input dimensions
         x = np.atleast_1d(x)
-        diag = np.atleast_1d(diag)
-        N = len(diag)
-        if N != len(x) * self.M:
-            raise ValueError("'diag' must have the shape 'N * M'")
-
-        # Compute the kernel for the celerite subproblem
-        a_sub, U_sub, V_sub, _ = self.term.get_celerite_matrices(
+        diag = np.atleast_2d(diag)
+        N0, M = diag.shape
+        if len(x) != N0 or M != self.M:
+            raise ValueError("'diag' must have the shape (N, M)")
+        N = N0 * M
+
+        # Compute the coefficients and kernel for the celerite subproblem
+        ar, cr, ac, _, cc, _ = self.term.get_coefficients()
+        _, U_sub, V_sub, _ = self.term.get_celerite_matrices(
             x, np.zeros_like(x)
         )
-        J = U_sub.shape[1]
+        J0 = U_sub.shape[1]
+        J = J0 * self.M
 
         # Allocate memory as requested
         if a is None:
@@ -63,16 +66,21 @@ def get_celerite_matrices(
         # Expand the times appropriately
         x_full = np.repeat(x, self.M)
         dx = x_full[1:] - x_full[:-1]
-        a[:] = diag
 
-        a[:] = diag + tt.diag(self.R)[:, None] * (tt.sum(ar) + tt.sum(ac))
+        a[:] = (
+            diag + np.diag(self.R)[None, :] * (np.sum(ar) + np.sum(ac))
+        ).flatten()
+        U[:] = np.kron(U_sub, self.R)
+        V[:] = np.kron(V_sub, np.eye(self.M))
+
+        c = np.concatenate((cr, cc, cc))
+        P[:, :J0] = np.exp(-c[None, :] * dx[:, None])
+        P[:, J0:] = np.repeat(P[:, :J0], self.M - 1, axis=1)
+
+        return a, U, V, P
 
-        # a = tt.reshape(a.T, (1, a.size))[0]
-        # U = tt.slinalg.kron(U, self.R)
-        # V = tt.slinalg.kron(V, tt.eye(self.R.shape[0]))
-        # c = tt.concatenate((cr, cc, cc))
-        # P = tt.exp(-c[None, :] * dx[:, None])
-        # P = tt.tile(P, (1, self.R.shape[0]))
+    def get_value(self, tau):
+        return np.kron(self.term.get_value(tau), self.R)
 
 
 class LowRankKronTerm(KronTerm):

From bc93ea77760f4944257c8b72be489bb867ae8499 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 12:04:52 -0400
Subject: [PATCH 03/29] fixing order of columns in kron term

---
 python/celerite2/kron.py | 84 +++++++++++++++++++++++++++++++++++-----
 python/test/test_kron.py | 35 +++++++++++++++++
 2 files changed, 110 insertions(+), 9 deletions(-)
 create mode 100644 python/test/test_kron.py

diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index a4c2430..d5c8122 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
+
+__all__ = ["KronTerm"]
+
 import numpy as np
 
 from .terms import Term
@@ -16,7 +19,7 @@ def __init__(self, term, *, R):
             )
 
     def __add__(self, b):
-        return KronTermSum(self, b)
+        raise NotImplementedError("addition is not implemented for KronTerm")
 
     def __mul__(self, b):
         raise NotImplementedError(
@@ -24,14 +27,68 @@ def __mul__(self, b):
         )
 
     def get_coefficients(self):
-        raise ValueError("KronTerm objects don't provide coefficients")
+        raise ValueError(
+            "KronTerm objects don't provide coefficients and they can't be "
+            "used in operations with other celerite2 terms"
+        )
+
+    def get_value(self, tau):
+        """Compute the value of the kernel as a function of lag
+
+        Args:
+            tau (shape[...]): The lags where the kernel should be evaluated.
+        """
+        return np.kron(self.term.get_value(tau), self.R)
+
+    def to_dense(self, x, diag):
+        """Evaluate the dense covariance matrix for this term
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            diag (shape[N, M]): The diagonal variance of the system.
+        """
+        K = self.get_value(x[:, None] - x[None, :])
+        K[np.diag_indices_from(K)] += np.ascontiguousarray(diag).flatten()
+        return K
+
+    def get_psd(self, omega):
+        """Compute the value of the power spectral density for this process
+
+        For Kronecker terms, the PSD computed is actually for the base term.
+
+        Args:
+            omega (shape[...]): The (angular) frequencies where the power
+                should be evaluated.
+        """
+        return self.term.get_psd(omega)
 
     def get_celerite_matrices(
         self, x, diag, *, a=None, U=None, V=None, P=None
     ):
+        """Get the matrices needed to solve the celerite system
+
+        Pre-allocated arrays can be provided to the Python interface to be
+        re-used for multiple evaluations.
+
+        .. note:: In-place operations are not supported by the modeling
+            extensions.
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            diag (shape[N, M]): The diagonal variance of the system.
+            a (shape[N*M], optional): The diagonal of the A matrix.
+            U (shape[N*M, J*M], optional): The first low-rank matrix.
+            V (shape[N*M, J*M], optional): The second low-rank matrix.
+            P (shape[N*M-1, J], optional): The regularization matrix used for
+                numerical stability.
+
+        Raises:
+            ValueError: When the inputs are not valid.
+        """
+
         # Check the input dimensions
-        x = np.atleast_1d(x)
-        diag = np.atleast_2d(diag)
+        x = np.ascontiguousarray(np.atleast_1d(x))
+        diag = np.ascontiguousarray(np.atleast_2d(diag))
         N0, M = diag.shape
         if len(x) != N0 or M != self.M:
             raise ValueError("'diag' must have the shape (N, M)")
@@ -64,7 +121,7 @@ def get_celerite_matrices(
             P.resize((N - 1, J), refcheck=False)
 
         # Expand the times appropriately
-        x_full = np.repeat(x, self.M)
+        x_full = np.tile(x[:, None], (1, self.M)).flatten()
         dx = x_full[1:] - x_full[:-1]
 
         a[:] = (
@@ -74,13 +131,22 @@ def get_celerite_matrices(
         V[:] = np.kron(V_sub, np.eye(self.M))
 
         c = np.concatenate((cr, cc, cc))
-        P[:, :J0] = np.exp(-c[None, :] * dx[:, None])
-        P[:, J0:] = np.repeat(P[:, :J0], self.M - 1, axis=1)
+        P0 = np.exp(-c[None, :] * dx[:, None])
+        P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape((-1, J))
 
         return a, U, V, P
 
-    def get_value(self, tau):
-        return np.kron(self.term.get_value(tau), self.R)
+    def get_conditional_mean_matrices(self, x, t):
+        """Get the matrices needed to compute the conditional mean function
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            t (shape[M]): The independent coordinates where the predictions
+                will be made.
+        """
+        raise NotImplementedError(
+            "Conditional mean matrices have not (yet!) been implemented"
+        )
 
 
 class LowRankKronTerm(KronTerm):
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
new file mode 100644
index 0000000..8f2edd1
--- /dev/null
+++ b/python/test/test_kron.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+
+from celerite2 import kron, terms
+
+
+def test_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M)
+    eye = np.eye(N * M)
+
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R)
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.shape == (N * M,)
+    assert U.shape == (N * M, 2 * M)
+    assert V.shape == (N * M, 2 * M)
+    assert P.shape == (N * M - 1, 2 * M)
+
+    tau = x - x[0]
+    K = term.get_value(tau[:, None] - tau[None, :])
+    K[np.diag_indices_from(K)] += diag.flatten()
+    K0 = term.dot(x, diag, eye)
+    assert np.allclose(K, K0)
+    assert np.allclose(np.dot(K, y), term.dot(x, diag, y))

From 1a868877219a0547e0d7812f94c108881d358b39 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 13:47:53 -0400
Subject: [PATCH 04/29] caching theano directory

---
 .github/workflows/python.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index e24d5c3..c1550a9 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -73,6 +73,12 @@ jobs:
         with:
           python-version: 3.8
           auto-update-conda: true
+      - name: "Cache ~/.theano"
+        uses: actions/cache@v2
+        with:
+          path: ~/.theano
+          key: theano-${{ hashFiles('python/test/theano/*.py') }}
+          restore-keys: theano-
       - name: Install dependencies
         shell: bash -l {0}
         run: |

From c122d8c624f84085feec6fea76b6eec637ddc143 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 13:54:06 -0400
Subject: [PATCH 05/29] cache based on compiledir

---
 .github/workflows/python.yml | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index c1550a9..e20ba51 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -73,18 +73,24 @@ jobs:
         with:
           python-version: 3.8
           auto-update-conda: true
-      - name: "Cache ~/.theano"
-        uses: actions/cache@v2
-        with:
-          path: ~/.theano
-          key: theano-${{ hashFiles('python/test/theano/*.py') }}
-          restore-keys: theano-
       - name: Install dependencies
         shell: bash -l {0}
         run: |
           conda install -q numpy scipy theano mkl-service
           python -m pip install -U pip
           python -m pip install --use-feature=2020-resolver -e ".[test,theano]"
+      - name: Get theano compiledir
+        id: compiledir
+        run: |
+          python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])"
+      - name: "Cache ~/.theano"
+        uses: actions/cache@v2
+        with:
+          path: ~/.theano
+          key: theano-${{ steps.compiledir.outputs.compiledir }}-${{ hashFiles('python/test/theano/*.py') }}
+          restore-keys: |
+            theano-${{ steps.compiledir.outputs.compiledir }}-
+            theano-
       - name: Run the unit tests
         shell: bash -l {0}
         run: python -m pytest --cov celerite2 python/test/theano

From 93888d8e07cd2f21588767166c04108e2f77649e Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 14:00:27 -0400
Subject: [PATCH 06/29] forgotten shell command

---
 .github/workflows/python.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index e20ba51..421a2f6 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -81,6 +81,7 @@ jobs:
           python -m pip install --use-feature=2020-resolver -e ".[test,theano]"
       - name: Get theano compiledir
         id: compiledir
+        shell: bash -l {0}
         run: |
           python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])"
       - name: "Cache ~/.theano"

From a7a4791ce69716f3c4b536ba2fba76b97dddae95 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 14:30:29 -0400
Subject: [PATCH 07/29] adding implementation of low rank kron

---
 python/celerite2/kron.py | 54 ++++++++++++++++++++++++++++++++++------
 python/test/test_kron.py | 45 +++++++++++++++++++++++++++------
 2 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index d5c8122..d37be0d 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -17,6 +17,7 @@ def __init__(self, term, *, R):
                 "R must be a square matrix; "
                 "use a LowRankKronTerm for the low rank model"
             )
+        self.alpha2 = np.diag(self.R)
 
     def __add__(self, b):
         raise NotImplementedError("addition is not implemented for KronTerm")
@@ -79,7 +80,7 @@ def get_celerite_matrices(
             a (shape[N*M], optional): The diagonal of the A matrix.
             U (shape[N*M, J*M], optional): The first low-rank matrix.
             V (shape[N*M, J*M], optional): The second low-rank matrix.
-            P (shape[N*M-1, J], optional): The regularization matrix used for
+            P (shape[N*M-1, J*M], optional): The regularization matrix used for
                 numerical stability.
 
         Raises:
@@ -100,7 +101,7 @@ def get_celerite_matrices(
             x, np.zeros_like(x)
         )
         J0 = U_sub.shape[1]
-        J = J0 * self.M
+        J = self._get_J(J0)
 
         # Allocate memory as requested
         if a is None:
@@ -125,14 +126,14 @@ def get_celerite_matrices(
         dx = x_full[1:] - x_full[:-1]
 
         a[:] = (
-            diag + np.diag(self.R)[None, :] * (np.sum(ar) + np.sum(ac))
+            diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac))
         ).flatten()
-        U[:] = np.kron(U_sub, self.R)
-        V[:] = np.kron(V_sub, np.eye(self.M))
+        U[:] = np.kron(U_sub, self._get_U_kron())
+        V[:] = np.kron(V_sub, self._get_V_kron())
 
         c = np.concatenate((cr, cc, cc))
-        P0 = np.exp(-c[None, :] * dx[:, None])
-        P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape((-1, J))
+        P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0])
+        self._copy_P(P0, P)
 
         return a, U, V, P
 
@@ -148,9 +149,46 @@ def get_conditional_mean_matrices(self, x, t):
             "Conditional mean matrices have not (yet!) been implemented"
         )
 
+    # The following should be implemented by subclasses
+    def _get_J(self, J0):
+        return self.M * J0
+
+    def _get_U_kron(self):
+        return self.R
+
+    def _get_V_kron(self):
+        return np.eye(self.M)
+
+    def _copy_P(self, P0, P):
+        P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape(P.shape)
+
 
 class LowRankKronTerm(KronTerm):
-    pass
+    def __init__(self, term, *, alpha):
+        self.term = term
+        self.alpha = np.ascontiguousarray(
+            np.atleast_1d(alpha), dtype=np.float64
+        )
+        if self.alpha.ndim != 1:
+            raise ValueError(
+                "alpha must be a vector; "
+                "use a general KronTerm for a full rank model"
+            )
+        self.M = len(self.alpha)
+        self.R = np.outer(self.alpha, self.alpha)
+        self.alpha2 = self.alpha ** 2
+
+    def _get_J(self, J0):
+        return J0
+
+    def _get_U_kron(self):
+        return self.alpha[:, None]
+
+    def _get_V_kron(self):
+        return self.alpha[:, None]
+
+    def _copy_P(self, P0, P):
+        pass
 
 
 class KronTermSum(Term):
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 8f2edd1..2c8e248 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -4,6 +4,15 @@
 from celerite2 import kron, terms
 
 
+def check_value(term, x, diag, y):
+    tau = x - x[0]
+    K = term.get_value(tau[:, None] - tau[None, :])
+    K[np.diag_indices_from(K)] += diag.flatten()
+    K0 = term.dot(x, diag, np.eye(diag.size))
+    assert np.allclose(K, K0)
+    assert np.allclose(np.dot(K, y), term.dot(x, diag, y))
+
+
 def test_value():
     N = 100
     M = 5
@@ -11,8 +20,7 @@ def test_value():
     np.random.seed(105)
     x = np.sort(np.random.uniform(0, 10, N))
     diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M)
-    eye = np.eye(N * M)
+    y = np.random.randn(N * M, 3)
 
     R = np.random.randn(M, M)
     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
@@ -21,15 +29,36 @@ def test_value():
 
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
     term = kron.KronTerm(term0, R=R)
+
     a, U, V, P = term.get_celerite_matrices(x, diag)
     assert a.shape == (N * M,)
     assert U.shape == (N * M, 2 * M)
     assert V.shape == (N * M, 2 * M)
     assert P.shape == (N * M - 1, 2 * M)
 
-    tau = x - x[0]
-    K = term.get_value(tau[:, None] - tau[None, :])
-    K[np.diag_indices_from(K)] += diag.flatten()
-    K0 = term.dot(x, diag, eye)
-    assert np.allclose(K, K0)
-    assert np.allclose(np.dot(K, y), term.dot(x, diag, y))
+    check_value(term, x, diag, y)
+
+
+def test_low_rank_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    alpha = np.random.randn(M)
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.LowRankKronTerm(term0, alpha=alpha)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.shape == (N * M,)
+    assert U.shape == (N * M, 2)
+    assert V.shape == (N * M, 2)
+    assert P.shape == (N * M - 1, 2)
+
+    check_value(term, x, diag, y)
+
+    full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
+    assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))

From 12e3883f189fe3d4133161a9375428545a6d1e7d Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Wed, 14 Oct 2020 16:08:22 -0400
Subject: [PATCH 08/29] implementing kron term sum

---
 python/celerite2/kron.py  |  31 ++++-
 python/celerite2/terms.py | 261 ++++++++++++++++++++++++++++++++------
 python/test/test_kron.py  |  27 ++++
 python/test/test_terms.py |  64 ++++++++--
 4 files changed, 326 insertions(+), 57 deletions(-)

diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index d37be0d..5284bdb 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["KronTerm"]
+__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"]
 
 import numpy as np
 
-from .terms import Term
+from .terms import Term, TermSumGeneral
 
 
 class KronTerm(Term):
+    __compat__ = "kronecker"
+    __requires_general_addition__ = True
+
     def __init__(self, term, *, R):
         self.term = term
         self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
@@ -19,14 +22,29 @@ def __init__(self, term, *, R):
             )
         self.alpha2 = np.diag(self.R)
 
+    def __len__(self):
+        return len(self.term) * self.M
+
     def __add__(self, b):
-        raise NotImplementedError("addition is not implemented for KronTerm")
+        if b.__compat__ != self.__compat__:
+            raise TypeError("KronTerms can only be added to other KronTerms")
+        return KronTermSum(self, b)
+
+    def __radd__(self, b):
+        if b.__compat__ != self.__compat__:
+            raise TypeError("KronTerms can only be added to other KronTerms")
+        return KronTermSum(b, self)
 
     def __mul__(self, b):
         raise NotImplementedError(
             "multiplication is not implemented for KronTerm"
         )
 
+    def __rmul__(self, b):
+        raise NotImplementedError(
+            "multiplication is not implemented for KronTerm"
+        )
+
     def get_coefficients(self):
         raise ValueError(
             "KronTerm objects don't provide coefficients and they can't be "
@@ -178,6 +196,9 @@ def __init__(self, term, *, alpha):
         self.R = np.outer(self.alpha, self.alpha)
         self.alpha2 = self.alpha ** 2
 
+    def __len__(self):
+        return len(self.term)
+
     def _get_J(self, J0):
         return J0
 
@@ -191,5 +212,5 @@ def _copy_P(self, P0, P):
         pass
 
 
-class KronTermSum(Term):
-    pass
+class KronTermSum(TermSumGeneral):
+    __compat__ = "kronecker"
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index 72f7d04..7d99b92 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -2,6 +2,7 @@
 
 __all__ = [
     "Term",
+    "TermSumGeneral",
     "TermSum",
     "TermProduct",
     "TermDiff",
@@ -30,16 +31,61 @@ class Term:
 
     """
 
+    __compat__ = None
+    __requires_general_addition__ = False
+
     def __add__(self, b):
+        if self.__compat__ != b.__compat__:
+            raise TypeError("Incompatible terms")
+        if (
+            self.__requires_general_addition__
+            or b.__requires_general_addition__
+        ):
+            return TermSumGeneral(self, b)
         return TermSum(self, b)
 
     def __mul__(self, b):
+        if self.__compat__ != b.__compat__:
+            raise TypeError("Incompatible terms")
         return TermProduct(self, b)
 
+    def __len__(self):
+        r, _, c, _, _, _ = self.get_coefficients()
+        return len(r) + 2 * len(c)
+
     @property
     def terms(self):
         return [self]
 
+    def to_dense(self, x, diag):
+        """Evaluate the dense covariance matrix for this term
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            diag (shape[N]): The diagonal variance of the system.
+        """
+        K = self.get_value(x[:, None] - x[None, :])
+        K[np.diag_indices_from(K)] += diag
+        return K
+
+    def dot(self, x, diag, y):
+        """Apply a matrix-vector or matrix-matrix product
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            diag (shape[N]): The diagonal variance of the system.
+            y (shape[N] or shape[N, K]): The target of vector or matrix for
+                this operation.
+        """
+        a, U, V, P = self.get_celerite_matrices(x, diag)
+
+        y = np.atleast_1d(y)
+        if y.shape[0] != len(a):
+            raise ValueError("dimension mismatch")
+
+        z = np.empty(y.shape)
+        return driver.matmul(a, U, V, P, y, z)
+
     def get_coefficients(self):
         """Compute and return the coefficients for the celerite model
 
@@ -103,17 +149,6 @@ def get_psd(self, omega):
 
         return np.sqrt(2 / np.pi) * psd
 
-    def to_dense(self, x, diag):
-        """Evaluate the dense covariance matrix for this term
-
-        Args:
-            x (shape[N]): The independent coordinates of the data.
-            diag (shape[N]): The diagonal variance of the system.
-        """
-        K = self.get_value(x[:, None] - x[None, :])
-        K[np.diag_indices_from(K)] += diag
-        return K
-
     def get_celerite_matrices(
         self, x, diag, *, a=None, U=None, V=None, P=None
     ):
@@ -195,24 +230,6 @@ def get_conditional_mean_matrices(self, x, t):
 
         return U_star, V_star, inds
 
-    def dot(self, x, diag, y):
-        """Apply a matrix-vector or matrix-matrix product
-
-        Args:
-            x (shape[N]): The independent coordinates of the data.
-            diag (shape[N]): The diagonal variance of the system.
-            y (shape[N] or shape[N, K]): The target of vector or matrix for
-                this operation.
-        """
-        a, U, V, P = self.get_celerite_matrices(x, diag)
-
-        y = np.atleast_1d(y)
-        if y.shape[0] != len(a):
-            raise ValueError("dimension mismatch")
-
-        z = np.empty(y.shape)
-        return driver.matmul(a, U, V, P, y, z)
-
 
 class TermSum(Term):
     """A sum of multiple :class:`Term` objects
@@ -226,10 +243,10 @@ class TermSum(Term):
     """
 
     def __init__(self, *terms):
-        if any(isinstance(term, TermConvolution) for term in terms):
+        if any(term.__requires_general_addition__ for term in terms):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self._terms = terms
 
@@ -242,6 +259,146 @@ def get_coefficients(self):
         return tuple(np.concatenate(c) for c in zip(*coeffs))
 
 
+class TermSumGeneral(Term):
+    """A general sum of multiple :class:`Term` objects
+
+    This will be used if any of the terms require "general addition",
+    or if they don't provide a ``get_coefficients`` method.
+
+    Args:
+        *terms: Any number of :class:`Term` subclasses to add together.
+    """
+
+    __requires_general_addition__ = True
+
+    def __init__(self, *terms):
+        basic = [
+            term for term in terms if not term.__requires_general_addition__
+        ]
+        self._terms = [
+            term for term in terms if term.__requires_general_addition__
+        ]
+        if len(basic):
+            self._terms.insert(0, TermSum(*basic))
+        if not len(self._terms):
+            raise ValueError(
+                "A general term sum cannot be instantiated without any terms"
+            )
+
+    @property
+    def terms(self):
+        return self._terms
+
+    def __len__(self):
+        return sum(map(len, self.terms))
+
+    def get_value(self, tau):
+        """Compute the value of the kernel as a function of lag
+
+        Args:
+            tau (shape[...]): The lags where the kernel should be evaluated.
+        """
+        K = self.terms[0].get_value(tau)
+        for term in self.terms[1:]:
+            K += term.get_value(tau)
+        return K
+
+    def get_psd(self, omega):
+        """Compute the value of the power spectral density for this process
+
+        Args:
+            omega (shape[...]): The (angular) frequencies where the power
+                should be evaluated.
+        """
+        p = self.terms[0].get_psd(omega)
+        for term in self.terms[1:]:
+            p += term.get_psd(omega)
+        return p
+
+    def get_celerite_matrices(
+        self, x, diag, *, a=None, U=None, V=None, P=None
+    ):
+        """Get the matrices needed to solve the celerite system
+
+        Pre-allocated arrays can be provided to the Python interface to be
+        re-used for multiple evaluations.
+
+        .. note:: In-place operations are not supported by the modeling
+            extensions.
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            diag (shape[N]): The diagonal variance of the system.
+            a (shape[N], optional): The diagonal of the A matrix.
+            U (shape[N, J], optional): The first low-rank matrix.
+            V (shape[N, J], optional): The second low-rank matrix.
+            P (shape[N-1, J], optional): The regularization matrix used for
+                numerical stability.
+
+        Raises:
+            ValueError: When the inputs are not valid.
+        """
+        x = np.atleast_1d(x)
+        diag = np.atleast_1d(diag)
+        N = diag.size
+        sizes = [len(term) for term in self.terms]
+        J = sum(sizes)
+
+        if a is None:
+            a = np.empty(N)
+        else:
+            a.resize(N, refcheck=False)
+        if U is None:
+            U = np.empty((N, J))
+        else:
+            U.resize((N, J), refcheck=False)
+        if V is None:
+            V = np.empty((N, J))
+        else:
+            V.resize((N, J), refcheck=False)
+        if P is None:
+            P = np.empty((N - 1, J))
+        else:
+            P.resize((N - 1, J), refcheck=False)
+
+        j = sizes[0]
+        _, U[:, :j], V[:, :j], P[:, :j] = self.terms[0].get_celerite_matrices(
+            x,
+            diag,
+            a=a,
+        )
+        for dj, term in zip(sizes[1:], self.terms[1:]):
+            (
+                da,
+                U[:, j : j + dj],
+                V[:, j : j + dj],
+                P[:, j : j + dj],
+            ) = term.get_celerite_matrices(
+                x,
+                np.zeros_like(diag),
+            )
+            a[:] += da
+            j += dj
+
+        return a, U, V, P
+
+    def get_conditional_mean_matrices(self, x, t):
+        """Get the matrices needed to compute the conditional mean function
+
+        Args:
+            x (shape[N]): The independent coordinates of the data.
+            t (shape[M]): The independent coordinates where the predictions
+                will be made.
+        """
+        Us = []
+        Vs = []
+        for term in self.terms:
+            U, V, inds = term.get_conditional_mean_matrices(x, t)
+            Us.append(U)
+            Vs.append(V)
+        return np.concatenate(Us, axis=1), np.concatenate(Vs, axis=1), inds
+
+
 class TermProduct(Term):
     """A product of two :class:`Term` objects
 
@@ -255,12 +412,13 @@ class TermProduct(Term):
     """
 
     def __init__(self, term1, term2):
-        int1 = isinstance(term1, TermConvolution)
-        int2 = isinstance(term2, TermConvolution)
-        if int1 or int2:
+        if (
+            term1.__requires_general_addition__
+            or term2.__requires_general_addition__
+        ):
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term1 = term1
         self.term2 = term2
@@ -316,13 +474,16 @@ class TermDiff(Term):
     """
 
     def __init__(self, term):
-        if isinstance(term, TermConvolution):
+        if term.__requires_general_addition__:
             raise TypeError(
-                "You cannot perform operations on an TermConvolution, it must "
-                "be the outer term in the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term = term
 
+    def __len__(self):
+        return len(self.term)
+
     def get_coefficients(self):
         coeffs = self.term.get_coefficients()
         a, b, c, d = coeffs[2:]
@@ -350,10 +511,15 @@ class TermConvolution(Term):
             exposure time).
     """
 
+    __requires_general_addition__ = True
+
     def __init__(self, term, delta):
         self.term = term
         self.delta = float(delta)
 
+    def __len__(self):
+        return len(self.term)
+
     def get_celerite_matrices(
         self, x, diag, *, a=None, U=None, V=None, P=None
     ):
@@ -523,6 +689,9 @@ def __init__(self, *, a, c):
         self.a = float(a)
         self.c = float(c)
 
+    def __len__(self):
+        return 1
+
     def get_coefficients(self):
         e = np.empty(0)
         return np.array([self.a]), np.array([self.c]), e, e, e, e
@@ -564,6 +733,9 @@ def __init__(self, *, a, b, c, d):
         self.c = float(c)
         self.d = float(d)
 
+    def __len__(self):
+        return 2
+
     def get_coefficients(self):
         e = np.empty(0)
         return (
@@ -658,6 +830,9 @@ def get_test_parameters():
     def __init__(self, *, eps=1e-5):
         self.eps = float(eps)
 
+    def __len__(self):
+        return 2
+
     def overdamped(self):
         Q = self.Q
         f = np.sqrt(np.maximum(1.0 - 4.0 * Q ** 2, self.eps))
@@ -734,6 +909,9 @@ def __init__(self, *, sigma, rho, eps=0.01):
         self.rho = float(rho)
         self.eps = float(eps)
 
+    def __len__(self):
+        return 2
+
     def get_coefficients(self):
         w0 = np.sqrt(3) / self.rho
         S0 = self.sigma ** 2 / w0
@@ -814,6 +992,9 @@ def __init__(self, *, sigma, period, Q0, dQ, f):
             SHOTerm(S0=S1, w0=w1, Q=Q1), SHOTerm(S0=S2, w0=w2, Q=Q2)
         )
 
+    def __len__(self):
+        return 4
+
 
 class OriginalCeleriteTerm(Term):
     """A wrapper around terms defined using the original celerite package
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 2c8e248..303eb18 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -62,3 +62,30 @@ def test_low_rank_value():
 
     full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
     assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))
+
+
+def test_sum_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    alpha = np.random.randn(M)
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.shape == (N * M,)
+    assert U.shape == (N * M, 2 * M + 2)
+    assert V.shape == (N * M, 2 * M + 2)
+    assert P.shape == (N * M - 1, 2 * M + 2)
+
+    check_value(term, x, diag, y)
diff --git a/python/test/test_terms.py b/python/test/test_terms.py
index 4ec2772..28c87bf 100644
--- a/python/test/test_terms.py
+++ b/python/test/test_terms.py
@@ -78,18 +78,7 @@ def _convert_kernel(celerite_kernel):
     raise NotImplementedError()
 
 
-@pytest.mark.parametrize("oterm", test_terms)
-def test_consistency(oterm):
-    # Check that the coefficients are all correct
-    term = _convert_kernel(oterm)
-    for v1, v2 in zip(oterm.get_all_coefficients(), term.get_coefficients()):
-        assert np.allclose(v1, v2)
-    for v1, v2 in zip(
-        terms.OriginalCeleriteTerm(oterm).get_coefficients(),
-        term.get_coefficients(),
-    ):
-        assert np.allclose(v1, v2)
-
+def check_term(oterm, term):
     # Make sure that the covariance matrix is right
     np.random.seed(40582)
     x = np.sort(np.random.uniform(0, 10, 50))
@@ -117,3 +106,54 @@ def test_consistency(oterm):
     y = np.vstack([x]).T
     value = term.dot(x, diag, y)
     assert np.allclose(value, np.dot(K, y))
+
+
+@pytest.mark.parametrize("oterm", test_terms)
+def test_consistency(oterm):
+    # Check that the coefficients are all correct
+    term = _convert_kernel(oterm)
+    for v1, v2 in zip(oterm.get_all_coefficients(), term.get_coefficients()):
+        assert np.allclose(v1, v2)
+    for v1, v2 in zip(
+        terms.OriginalCeleriteTerm(oterm).get_coefficients(),
+        term.get_coefficients(),
+    ):
+        assert np.allclose(v1, v2)
+
+    check_term(oterm, term)
+
+
+def test_general_sum():
+    term1 = terms.SHOTerm(sigma=1.0, rho=0.3, Q=0.1)
+    term2 = terms.SHOTerm(sigma=1.5, rho=3.3, Q=0.5)
+    term3 = terms.SHOTerm(sigma=0.3, rho=1.5, Q=2.0)
+    oterm = term1 + term2 + term3
+
+    term2.__requires_general_addition__ = True
+    term3.__requires_general_addition__ = True
+    term = term1 + term2 + term3
+    assert isinstance(oterm, terms.TermSum)
+    assert isinstance(term, terms.TermSumGeneral)
+
+    np.random.seed(40582)
+    x = np.sort(np.random.uniform(0, 10, 50))
+    t = np.sort(np.random.uniform(-1, 11, 100))
+    diag = np.random.uniform(0.1, 0.3, len(x))
+
+    for n, (a, b) in enumerate(
+        zip(
+            oterm.get_celerite_matrices(x, diag),
+            term.get_celerite_matrices(x, diag),
+        )
+    ):
+        assert np.allclose(a, b)
+
+    for n, (a, b) in enumerate(
+        zip(
+            oterm.get_conditional_mean_matrices(x, t),
+            term.get_conditional_mean_matrices(x, t),
+        )
+    ):
+        assert np.allclose(a, b)
+
+    check_term(oterm, term)

From 3289b493825be589148c663cb0b857bda3df9819 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 14 Oct 2020 18:24:21 -0400
Subject: [PATCH 09/29] skipping termsumgeneral

---
 python/test/theano/test_terms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py
index 1fbcb8a..51da7d5 100644
--- a/python/test/theano/test_terms.py
+++ b/python/test/theano/test_terms.py
@@ -27,7 +27,7 @@
 def test_complete_implementation():
     x = np.linspace(-10, 10, 500)
     for name in pyterms.__all__:
-        if name == "OriginalCeleriteTerm":
+        if name == "OriginalCeleriteTerm" or name == "TermSumGeneral":
             continue
         term = getattr(terms, name)
         if name.startswith("Term"):

From 303bdf1c1d613f6b3a9b2b54774ab275dc88a610 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 14 Oct 2020 18:25:29 -0400
Subject: [PATCH 10/29] xfail

---
 python/test/theano/test_terms.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py
index 51da7d5..ebe4f50 100644
--- a/python/test/theano/test_terms.py
+++ b/python/test/theano/test_terms.py
@@ -24,10 +24,11 @@
 compare_terms = partial(check_tensor_term, lambda x: x.eval())
 
 
+@pytest.mark.xfail(reason="TermSumGeneral not yet implemented")
 def test_complete_implementation():
     x = np.linspace(-10, 10, 500)
     for name in pyterms.__all__:
-        if name == "OriginalCeleriteTerm" or name == "TermSumGeneral":
+        if name == "OriginalCeleriteTerm":
             continue
         term = getattr(terms, name)
         if name.startswith("Term"):

From 6be5390f7b13bf6e109878153dcf01110301e7d6 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 14 Oct 2020 18:26:45 -0400
Subject: [PATCH 11/29] cache theano for tutorials too

---
 .github/workflows/tutorials.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
index e93ff60..2f16822 100644
--- a/.github/workflows/tutorials.yml
+++ b/.github/workflows/tutorials.yml
@@ -29,6 +29,20 @@ jobs:
           python -m pip install -U pip
           python -m pip install --use-feature=2020-resolver ".[tutorials]"
 
+      - name: Get theano compiledir
+        id: compiledir
+        run: |
+          python -c "import theano; print('::set-output name=compiledir::' + theano.config.compiledir.split('/')[-1])"
+
+      - name: "Cache ~/.theano"
+        uses: actions/cache@v2
+        with:
+          path: ~/.theano
+          key: tutorials-${{ steps.compiledir.outputs.compiledir }}-${{ hashFiles('docs/tutorials/*.py') }}
+          restore-keys: |
+            tutorials-${{ steps.compiledir.outputs.compiledir }}-
+            tutorials-
+
       - name: Execute the notebooks
         run: |
           jupytext --to ipynb --execute docs/tutorials/*.py

From c746b9f83221e71ffaf9ef1188916d5cc0600d70 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Thu, 15 Oct 2020 13:49:30 -0400
Subject: [PATCH 12/29] handling latent dimensions generally

---
 python/celerite2/celerite2.py | 133 +++++++++++++++++++++++-----------
 python/celerite2/kron.py      |  21 ++++--
 python/celerite2/terms.py     |  13 ++--
 python/test/test_celerite2.py |   3 +
 4 files changed, 118 insertions(+), 52 deletions(-)

diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 03ed52f..4b86bf7 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -65,7 +65,7 @@ def mean(self, mean):
     @property
     def mean_value(self):
         if self._mean_value is None:
-            raise AttributeError(
+            raise RuntimeError(
                 "'compute' must be executed before accessing mean_value"
             )
         return self._mean_value
@@ -97,30 +97,45 @@ def compute(
         # Check the input coordinates
         t = np.atleast_1d(t)
         if check_sorted and np.any(np.diff(t) < 0.0):
-            raise ValueError("the input coordinates must be sorted")
+            raise ValueError("The input coordinates must be sorted")
         if len(t.shape) != 1:
-            raise ValueError("the input coordinates must be one dimensional")
+            raise ValueError("The input coordinates must be one dimensional")
 
         # Save the diagonal
         self._t = np.ascontiguousarray(t, dtype=np.float64)
+        if self._t.ndim != 1:
+            raise ValueError("Input 't' must be 1D")
         self._mean_value = self._mean(self._t)
-        try:
-            self._diag = np.empty((len(self._t), self.kernel.M))
-        except AttributeError:
-            self._diag = np.empty_like(self._t)
+
+        # Check the dimensions
+        N = len(self._t)
+        M = self.kernel.dimension
+        if M:
+            self._shape = (N, M)
+        else:
+            self._shape = (N,)
+
+        self._diag = np.empty(self._shape)
         self._size = self._diag.size
         if yerr is None and diag is None:
             self._diag[:] = 0.0
 
-        elif yerr is not None:
-            if diag is not None:
+        else:
+            if yerr is not None:
+                if diag is not None:
+                    raise ValueError(
+                        "Only one of 'diag' and 'yerr' can be provided"
+                    )
+                diag = np.atleast_1d(yerr) ** 2
+            else:
+                diag = np.atleast_1d(diag)
+
+            if diag.shape != self._shape:
                 raise ValueError(
-                    "only one of 'diag' and 'yerr' can be provided"
+                    f"Invalid shape for 'diag'; expected {self._shape}, got "
+                    f"{diag.shape}"
                 )
-            self._diag[:] = np.atleast_1d(yerr) ** 2
-
-        else:
-            self._diag[:] = np.atleast_1d(diag)
+            self._diag[:] = diag
 
         # Fill the celerite matrices
         (
@@ -145,7 +160,7 @@ def compute(
         else:
             self._log_det = np.sum(np.log(self._d))
             self._norm = -0.5 * (
-                self._log_det + len(self._t) * np.log(2 * np.pi)
+                self._log_det + self._size * np.log(2 * np.pi)
             )
 
     def recompute(self, *, quiet=False):
@@ -164,20 +179,17 @@ def recompute(self, *, quiet=False):
         """
         if self._t is None:
             raise RuntimeError(
-                "you must call 'compute' directly  at least once"
+                "You must call 'compute' directly  at least once"
             )
         return self.compute(
             self._t, diag=self._diag, check_sorted=False, quiet=quiet
         )
 
     def _process_input(self, y, *, inplace=False, require_vector=False):
-        y = np.atleast_1d(y)
         if self._t is None:
-            raise RuntimeError("you must call 'compute' first")
-        if self._size != y.shape[0]:
-            raise ValueError("dimension mismatch")
-        if require_vector and (self._size,) != y.shape:
-            raise ValueError("'y' must be one dimensional")
+            raise RuntimeError("You must call 'compute' first")
+
+        y = np.atleast_1d(y)
         if inplace:
             if (
                 y.dtype != "float64"
@@ -191,8 +203,26 @@ def _process_input(self, y, *, inplace=False, require_vector=False):
             y = np.ascontiguousarray(y, dtype=np.float64)
         else:
             y = np.array(y, dtype=np.float64, copy=True, order="C")
+
+        if self._shape != y.shape[: len(self._shape)]:
+            raise ValueError(
+                f"Dimension mismatch; expected {self._shape}..., "
+                f"got {y.shape[:len(self._shape)]}..."
+            )
+
+        if len(self._shape) == 2:
+            y = np.reshape(y, (self._size,) + y.shape[2:])
+
+        if require_vector and (self._size,) != y.shape:
+            raise ValueError("'y' must be one dimensional")
+
         return y
 
+    def _reshape_output(self, y):
+        if len(self._shape) == 1:
+            return y
+        return np.reshape(y, (-1, self._shape[1]) + y.shape[1:])
+
     def apply_inverse(self, y, *, inplace=False):
         """Apply the inverse of the covariance matrix to a vector or matrix
 
@@ -213,7 +243,9 @@ def apply_inverse(self, y, *, inplace=False):
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
         y = self._process_input(y, inplace=inplace)
-        return driver.solve(self._U, self._P, self._d, self._W, y)
+        return self._reshape_output(
+            driver.solve(self._U, self._P, self._d, self._W, y)
+        )
 
     def dot_tril(self, y, *, inplace=False):
         """Dot the Cholesky factor of the GP system into a vector or matrix
@@ -235,7 +267,9 @@ def dot_tril(self, y, *, inplace=False):
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
         y = self._process_input(y, inplace=inplace)
-        return driver.dot_tril(self._U, self._P, self._d, self._W, y)
+        return self._reshape_output(
+            driver.dot_tril(self._U, self._P, self._d, self._W, y)
+        )
 
     def log_likelihood(self, y, *, inplace=False):
         """Compute the marginalized likelihood of the GP model
@@ -257,11 +291,13 @@ def log_likelihood(self, y, *, inplace=False):
                 first.
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
-        y = self._process_input(y, inplace=inplace, require_vector=True)
+        y = self._process_input(
+            y - self.mean_value, inplace=inplace, require_vector=True
+        )
         if not np.isfinite(self._log_det):
             return -np.inf
         loglike = self._norm - 0.5 * driver.norm(
-            self._U, self._P, self._d, self._W, y - self._mean(self._t)
+            self._U, self._P, self._d, self._W, y
         )
         if not np.isfinite(loglike):
             return -np.inf
@@ -307,11 +343,11 @@ def predict(
                 first.
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
-        y = self._process_input(y, inplace=True, require_vector=True)
-        mean_value = self._mean(self._t)
-        alpha = driver.solve(
-            self._U, self._P, self._d, self._W, y - mean_value
+        mean_value = self.mean_value
+        y = self._process_input(
+            y - mean_value, inplace=True, require_vector=True
         )
+        alpha = driver.solve(self._U, self._P, self._d, self._W, np.copy(y))
 
         if t is None:
             xs = self._t
@@ -326,9 +362,9 @@ def predict(
             kernel = self.kernel
 
             if t is None:
-                mu = y - self._diag * alpha
-                if not include_mean:
-                    mu -= mean_value
+                mu = self._reshape_output(y - self._diag.flatten() * alpha)
+                if include_mean:
+                    mu += mean_value
             else:
 
                 (
@@ -340,13 +376,13 @@ def predict(
                 mu = driver.conditional_mean(
                     self._U, self._V, self._P, alpha, U_star, V_star, inds, mu
                 )
-
+                mu = self._reshape_output(mu)
                 if include_mean:
                     mu += self._mean(xs)
 
         else:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
-            mu = np.dot(KxsT.T, alpha)
+            mu = self._reshape_output(np.dot(KxsT.T, alpha))
             if include_mean:
                 mu += self._mean(xs)
 
@@ -360,14 +396,19 @@ def predict(
             var = kernel.get_value(0.0) - np.einsum(
                 "ij,ij->j", KxsT, self.apply_inverse(KxsT, inplace=False)
             )
-            return mu, var
+            return mu, self._reshape_output(var)
 
         # Predictive covariance
         cov = kernel.get_value(xs[:, None] - xs[None, :])
         cov -= np.tensordot(
             KxsT, self.apply_inverse(KxsT, inplace=False), axes=(0, 0)
         )
-        return mu, cov
+        if len(self._shape) == 1:
+            return mu, cov
+
+        m = len(xs)
+        M = self._shape[1]
+        return mu, np.reshape(cov, (m, M, m, M))
 
     def sample(self, *, size=None, include_mean=True):
         """Generate random samples from the prior implied by the GP system
@@ -391,10 +432,10 @@ def sample(self, *, size=None, include_mean=True):
         if self._t is None:
             raise RuntimeError("you must call 'compute' first")
         if size is None:
-            n = np.random.randn(self._size)
+            n = np.random.randn(*self._shape)
         else:
-            n = np.random.randn(self._size, size)
-        result = self.dot_tril(n, inplace=True).T
+            n = np.random.randn(*(self._shape + (size,)))
+        result = np.moveaxis(self.dot_tril(n, inplace=True), -1, 0)
         if include_mean:
             result += self._mean(self._t)
         return result
@@ -438,6 +479,16 @@ def sample_conditional(
         mu, cov = self.predict(
             y, t, return_cov=True, include_mean=include_mean, kernel=kernel
         )
+
+        if len(self._shape) == 2:
+            mu = mu.flatten()
+            m = cov.shape[0] * cov.shape[1]
+            cov = np.reshape(cov, (m, m))
+
         if regularize is not None:
             cov[np.diag_indices_from(cov)] += regularize
-        return np.random.multivariate_normal(mu, cov, size=size)
+
+        samp = np.random.multivariate_normal(mu, cov, size=size)
+        if len(self._shape) == 1:
+            return samp
+        return np.reshape(samp, samp.shape[:-1] + (-1, self._shape[1]))
diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index 5284bdb..18b35ec 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -8,13 +8,18 @@
 
 
 class KronTerm(Term):
-    __compat__ = "kronecker"
     __requires_general_addition__ = True
 
+    @property
+    def dimension(self):
+        return self.M
+
     def __init__(self, term, *, R):
         self.term = term
         self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
         self.M = len(self.R)
+        if self.M < 1:
+            raise ValueError("At least one 'band' is required")
         if self.R.ndim != 2 or self.R.shape != (self.M, self.M):
             raise ValueError(
                 "R must be a square matrix; "
@@ -26,13 +31,13 @@ def __len__(self):
         return len(self.term) * self.M
 
     def __add__(self, b):
-        if b.__compat__ != self.__compat__:
-            raise TypeError("KronTerms can only be added to other KronTerms")
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         return KronTermSum(self, b)
 
     def __radd__(self, b):
-        if b.__compat__ != self.__compat__:
-            raise TypeError("KronTerms can only be added to other KronTerms")
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         return KronTermSum(b, self)
 
     def __mul__(self, b):
@@ -193,6 +198,8 @@ def __init__(self, term, *, alpha):
                 "use a general KronTerm for a full rank model"
             )
         self.M = len(self.alpha)
+        if self.M < 1:
+            raise ValueError("At least one 'band' is required")
         self.R = np.outer(self.alpha, self.alpha)
         self.alpha2 = self.alpha ** 2
 
@@ -213,4 +220,6 @@ def _copy_P(self, P0, P):
 
 
 class KronTermSum(TermSumGeneral):
-    __compat__ = "kronecker"
+    @property
+    def dimension(self):
+        return self.terms[0].M
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index 7d99b92..eafeadb 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -31,12 +31,15 @@ class Term:
 
     """
 
-    __compat__ = None
     __requires_general_addition__ = False
 
+    @property
+    def dimension(self):
+        return 0
+
     def __add__(self, b):
-        if self.__compat__ != b.__compat__:
-            raise TypeError("Incompatible terms")
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         if (
             self.__requires_general_addition__
             or b.__requires_general_addition__
@@ -45,8 +48,8 @@ def __add__(self, b):
         return TermSum(self, b)
 
     def __mul__(self, b):
-        if self.__compat__ != b.__compat__:
-            raise TypeError("Incompatible terms")
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         return TermProduct(self, b)
 
     def __len__(self):
diff --git a/python/test/test_celerite2.py b/python/test/test_celerite2.py
index c41f26a..4b130d9 100644
--- a/python/test/test_celerite2.py
+++ b/python/test/test_celerite2.py
@@ -73,6 +73,9 @@ def test_consistency(oterm, mean, data):
         dict(return_cov=False, return_var=True),
         dict(return_cov=True, return_var=False),
     ]:
+        print(args)
+        print(original_gp.predict(y, **args))
+        print(gp.predict(y, **args))
         assert all(
             np.allclose(a, b)
             for a, b in zip(

From 256ba4d8246302c35070db17ed0b76148daa77c6 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Fri, 16 Oct 2020 14:53:02 -0400
Subject: [PATCH 13/29] adding tests for predict and sample

---
 docs/.gitignore               |  1 +
 python/celerite2/celerite2.py | 65 ++++++++++++++++---------
 python/test/test_kron.py      | 90 ++++++++++++++++++++++++++++++++---
 3 files changed, 126 insertions(+), 30 deletions(-)

diff --git a/docs/.gitignore b/docs/.gitignore
index 4bbe113..e727465 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,2 +1,3 @@
 _build
 c++
+tutorials/*.png
diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 4b86bf7..7735614 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -223,6 +223,9 @@ def _reshape_output(self, y):
             return y
         return np.reshape(y, (-1, self._shape[1]) + y.shape[1:])
 
+    def _apply_inverse(self, y):
+        return driver.solve(self._U, self._P, self._d, self._W, y)
+
     def apply_inverse(self, y, *, inplace=False):
         """Apply the inverse of the covariance matrix to a vector or matrix
 
@@ -243,9 +246,7 @@ def apply_inverse(self, y, *, inplace=False):
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
         y = self._process_input(y, inplace=inplace)
-        return self._reshape_output(
-            driver.solve(self._U, self._P, self._d, self._W, y)
-        )
+        return self._reshape_output(self._apply_inverse(y))
 
     def dot_tril(self, y, *, inplace=False):
         """Dot the Cholesky factor of the GP system into a vector or matrix
@@ -358,6 +359,7 @@ def predict(
                 raise ValueError("dimension mismatch")
 
         KxsT = None
+        mu = None
         if kernel is None:
             kernel = self.kernel
 
@@ -365,22 +367,36 @@ def predict(
                 mu = self._reshape_output(y - self._diag.flatten() * alpha)
                 if include_mean:
                     mu += mean_value
-            else:
-
-                (
-                    U_star,
-                    V_star,
-                    inds,
-                ) = self.kernel.get_conditional_mean_matrices(self._t, xs)
-                mu = np.empty_like(xs)
-                mu = driver.conditional_mean(
-                    self._U, self._V, self._P, alpha, U_star, V_star, inds, mu
-                )
-                mu = self._reshape_output(mu)
-                if include_mean:
-                    mu += self._mean(xs)
+            elif np.all(np.diff(xs > 0)):
+
+                try:
+                    (
+                        U_star,
+                        V_star,
+                        inds,
+                    ) = self.kernel.get_conditional_mean_matrices(self._t, xs)
+
+                except NotImplementedError:
+                    # We'll fall back on the slow method below
+                    pass
+
+                else:
+                    mu = np.empty_like(xs)
+                    mu = driver.conditional_mean(
+                        self._U,
+                        self._V,
+                        self._P,
+                        alpha,
+                        U_star,
+                        V_star,
+                        inds,
+                        mu,
+                    )
+                    mu = self._reshape_output(mu)
+                    if include_mean:
+                        mu += self._mean(xs)
 
-        else:
+        if mu is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
             mu = self._reshape_output(np.dot(KxsT.T, alpha))
             if include_mean:
@@ -393,15 +409,15 @@ def predict(
         if KxsT is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
         if return_var:
-            var = kernel.get_value(0.0) - np.einsum(
-                "ij,ij->j", KxsT, self.apply_inverse(KxsT, inplace=False)
+            var = np.diag(kernel.get_value(0.0)) - self._reshape_output(
+                np.einsum("ij,ij->j", KxsT, self._apply_inverse(np.copy(KxsT)))
             )
-            return mu, self._reshape_output(var)
+            return mu, var
 
         # Predictive covariance
         cov = kernel.get_value(xs[:, None] - xs[None, :])
         cov -= np.tensordot(
-            KxsT, self.apply_inverse(KxsT, inplace=False), axes=(0, 0)
+            KxsT, self._apply_inverse(np.copy(KxsT)), axes=(0, 0)
         )
         if len(self._shape) == 1:
             return mu, cov
@@ -435,7 +451,10 @@ def sample(self, *, size=None, include_mean=True):
             n = np.random.randn(*self._shape)
         else:
             n = np.random.randn(*(self._shape + (size,)))
-        result = np.moveaxis(self.dot_tril(n, inplace=True), -1, 0)
+
+        result = self.dot_tril(n, inplace=True)
+        if size is not None:
+            result = np.moveaxis(result, -1, 0)
         if include_mean:
             result += self._mean(self._t)
         return result
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 303eb18..04ea533 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -1,17 +1,90 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 
-from celerite2 import kron, terms
+from celerite2 import kron, terms, GaussianProcess
 
 
-def check_value(term, x, diag, y):
-    tau = x - x[0]
-    K = term.get_value(tau[:, None] - tau[None, :])
+def check_value(term, x, diag, y, t):
+    N, M = diag.shape
+
+    K = term.get_value(x[:, None] - x[None, :])
+
+    try:
+        K0 = term.term.get_value(x[:, None] - x[None, :])
+    except AttributeError:
+        pass
+    else:
+        assert np.allclose(np.kron(K0, term.R), K)
+
     K[np.diag_indices_from(K)] += diag.flatten()
     K0 = term.dot(x, diag, np.eye(diag.size))
     assert np.allclose(K, K0)
     assert np.allclose(np.dot(K, y), term.dot(x, diag, y))
 
+    gp = GaussianProcess(term, t=x, diag=diag)
+
+    # "log_likelihood" method
+    yval = y[:, 0].reshape((N, M))
+    alpha = np.linalg.solve(K, y[:, 0])
+    loglike = -0.5 * (
+        np.dot(y[:, 0], alpha)
+        + np.linalg.slogdet(K)[1]
+        + len(K) * np.log(2 * np.pi)
+    )
+    assert np.allclose(loglike, gp.log_likelihood(yval))
+
+    # Predict
+    K0 = K - np.diag(diag.flatten())
+    mu0 = np.dot(K0, alpha)
+    cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T))
+
+    mu, var = gp.predict(yval, return_var=True)
+    _, cov = gp.predict(yval, return_cov=True)
+    assert np.allclose(mu, mu0.reshape((N, M)))
+    assert np.allclose(var, np.diag(cov0).reshape((N, M)))
+    assert np.allclose(cov, cov0.reshape((N, M, N, M)))
+
+    mu1, var1 = gp.predict(yval, t=x, return_var=True)
+    _, cov1 = gp.predict(yval, t=x, return_cov=True)
+    assert np.allclose(mu, mu1)
+    assert np.allclose(var, var1)
+    assert np.allclose(cov, cov1)
+
+    K0 = term.get_value(t[:, None] - x[None, :])
+    mu0 = np.dot(K0, alpha)
+    cov0 = term.get_value(t[:, None] - t[None, :]) - np.dot(
+        K0, np.linalg.solve(K, K0.T)
+    )
+    mu, var = gp.predict(yval, t=t, return_var=True)
+    _, cov = gp.predict(yval, t=t, return_cov=True)
+    assert np.allclose(mu, mu0.reshape((len(t), M)))
+    assert np.allclose(var, np.diag(cov0).reshape((len(t), M)))
+    assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M)))
+
+    # "sample" method
+    seed = 5938
+    np.random.seed(seed)
+    a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K)))
+    np.random.seed(seed)
+    b = gp.sample()
+    assert np.allclose(a.reshape((N, M)), b)
+
+    np.random.seed(seed)
+    a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K), 10))
+    np.random.seed(seed)
+    b = gp.sample(size=10)
+    assert np.allclose(
+        np.ascontiguousarray(np.moveaxis(a, -1, 0)).reshape((10, N, M)), b
+    )
+
+    # "sample_conditional" method, numerics make this one a little unstable;
+    # just check the shape
+    b = gp.sample_conditional(yval, t=t)
+    assert b.shape == (len(t), M)
+
+    b = gp.sample_conditional(yval, t=t, size=10)
+    assert b.shape == (10, len(t), M)
+
 
 def test_value():
     N = 100
@@ -19,6 +92,7 @@ def test_value():
 
     np.random.seed(105)
     x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
     diag = np.random.uniform(0.1, 0.5, (N, M))
     y = np.random.randn(N * M, 3)
 
@@ -36,7 +110,7 @@ def test_value():
     assert V.shape == (N * M, 2 * M)
     assert P.shape == (N * M - 1, 2 * M)
 
-    check_value(term, x, diag, y)
+    check_value(term, x, diag, y, t)
 
 
 def test_low_rank_value():
@@ -45,6 +119,7 @@ def test_low_rank_value():
 
     np.random.seed(105)
     x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
     diag = np.random.uniform(0.1, 0.5, (N, M))
     y = np.random.randn(N * M, 3)
 
@@ -58,7 +133,7 @@ def test_low_rank_value():
     assert V.shape == (N * M, 2)
     assert P.shape == (N * M - 1, 2)
 
-    check_value(term, x, diag, y)
+    check_value(term, x, diag, y, t)
 
     full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
     assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))
@@ -70,6 +145,7 @@ def test_sum_value():
 
     np.random.seed(105)
     x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
     diag = np.random.uniform(0.1, 0.5, (N, M))
     y = np.random.randn(N * M, 3)
 
@@ -88,4 +164,4 @@ def test_sum_value():
     assert V.shape == (N * M, 2 * M + 2)
     assert P.shape == (N * M - 1, 2 * M + 2)
 
-    check_value(term, x, diag, y)
+    check_value(term, x, diag, y, t)

From 83540264f7a8789b78f80d6fe3af73d550bbf8a2 Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Fri, 16 Oct 2020 15:07:20 -0400
Subject: [PATCH 14/29] fixing isort

---
 python/test/test_kron.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 04ea533..15e6a1f 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 
-from celerite2 import kron, terms, GaussianProcess
+from celerite2 import GaussianProcess, kron, terms
 
 
 def check_value(term, x, diag, y, t):

From cbca53018e40a925b3a25f65143760312d11a9ab Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Fri, 16 Oct 2020 15:14:06 -0400
Subject: [PATCH 15/29] fixing variance dimensions

---
 docs/tutorials/first.py              |  7 +++++--
 python/celerite2/celerite2.py        |  4 +++-
 python/test/theano/test_celerite2.py | 27 ---------------------------
 3 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/docs/tutorials/first.py b/docs/tutorials/first.py
index 9f23710..82ad05c 100644
--- a/docs/tutorials/first.py
+++ b/docs/tutorials/first.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.5.2
+#       jupytext_version: 1.6.0
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
@@ -36,7 +36,10 @@
 np.random.seed(42)
 
 t = np.sort(
-    np.append(np.random.uniform(0, 3.8, 57), np.random.uniform(5.5, 10, 68),)
+    np.append(
+        np.random.uniform(0, 3.8, 57),
+        np.random.uniform(5.5, 10, 68),
+    )
 )  # The input coordinates must be sorted
 yerr = np.random.uniform(0.08, 0.22, len(t))
 y = (
diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 7735614..357dcba 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -409,7 +409,9 @@ def predict(
         if KxsT is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
         if return_var:
-            var = np.diag(kernel.get_value(0.0)) - self._reshape_output(
+            var = np.squeeze(
+                np.diag(kernel.get_value(0.0))
+            ) - self._reshape_output(
                 np.einsum("ij,ij->j", KxsT, self._apply_inverse(np.copy(KxsT)))
             )
             return mu, var
diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py
index a9b9ddb..7ef1aed 100644
--- a/python/test/theano/test_celerite2.py
+++ b/python/test/theano/test_celerite2.py
@@ -55,33 +55,6 @@ def test_consistency(name, args, mean):
 
     check_gp_models(lambda x: x.eval(), gp, pygp, y, t)
 
-    # # "log_likelihood" method
-    # assert np.allclose(pygp.log_likelihood(y), gp.log_likelihood(y).eval())
-
-    # # "predict" method
-    # for args in [
-    #     dict(return_cov=False, return_var=False),
-    #     dict(return_cov=False, return_var=True),
-    #     dict(return_cov=True, return_var=False),
-    # ]:
-    #     assert all(
-    #         np.allclose(a, b)
-    #         for a, b in zip(
-    #             pygp.predict(y, **args),
-    #             theano.function([], gp.predict(y, **args))(),
-    #         )
-    #     )
-    #     assert all(
-    #         np.allclose(a, b)
-    #         for a, b in zip(
-    #             pygp.predict(y, t=t, **args),
-    #             theano.function([], gp.predict(y, t=t, **args))(),
-    #         )
-    #     )
-
-    # # "dot_tril" method
-    # assert np.allclose(pygp.dot_tril(y), gp.dot_tril(y).eval())
-
 
 def test_errors():
     # Generate fake data

From 1d38e1c8add5eff33dad20402950e8ec8367ed7f Mon Sep 17 00:00:00 2001
From: Dan F-M <danfm@nyu.edu>
Date: Fri, 16 Oct 2020 16:04:13 -0400
Subject: [PATCH 16/29] typo

---
 python/celerite2/celerite2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 357dcba..8675460 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -367,7 +367,7 @@ def predict(
                 mu = self._reshape_output(y - self._diag.flatten() * alpha)
                 if include_mean:
                     mu += mean_value
-            elif np.all(np.diff(xs > 0)):
+            elif np.all(np.diff(xs) > 0):
 
                 try:
                     (

From 5cb6d3ad53c8c6d1ac9aba19254a5ccddbe51641 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 16:02:22 -0400
Subject: [PATCH 17/29] starting to implement theano terms

---
 python/celerite2/__init__.py        |   4 +-
 python/celerite2/theano/__init__.py |   4 +-
 python/celerite2/theano/kron.py     | 141 ++++++++++++++++++++++++++++
 python/celerite2/theano/terms.py    | 110 +++++++++++++++++++---
 python/test/theano/test_kron.py     |  61 ++++++++++++
 python/test/theano/test_terms.py    |   1 -
 6 files changed, 302 insertions(+), 19 deletions(-)
 create mode 100644 python/celerite2/theano/kron.py
 create mode 100644 python/test/theano/test_kron.py

diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py
index d015d8e..d9e6d14 100644
--- a/python/celerite2/__init__.py
+++ b/python/celerite2/__init__.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["__version__", "terms", "GaussianProcess"]
+__all__ = ["__version__", "terms", "kron", "GaussianProcess"]
 
-from . import terms
+from . import terms, kron
 from .celerite2 import GaussianProcess
 from .celerite2_version import __version__
 
diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py
index 19b104a..905f38c 100644
--- a/python/celerite2/theano/__init__.py
+++ b/python/celerite2/theano/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["terms", "GaussianProcess", "CeleriteNormal"]
+__all__ = ["terms", "kron", "GaussianProcess", "CeleriteNormal"]
 
-from . import terms
+from . import terms, kron
 from .celerite2 import GaussianProcess
 from .distribution import CeleriteNormal
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
new file mode 100644
index 0000000..cc9d54a
--- /dev/null
+++ b/python/celerite2/theano/kron.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"]
+
+import theano.tensor as tt
+from theano.tensor.slinalg import kron
+
+from .terms import Term, TermSumGeneral
+from .. import kron as base_kron
+
+
+class KronTerm(Term):
+    __requires_general_addition__ = True
+    __doc__ = base_kron.KronTerm.__doc__
+
+    @property
+    def dimension(self):
+        return self.M
+
+    def __init__(self, term, *, R):
+        self.term = term
+        self.R = tt.as_tensor_variable(R)
+        self.M = self.R.shape[0]
+        if self.R.ndim != 2:
+            raise ValueError(
+                "R must be a square matrix; "
+                "use a LowRankKronTerm for the low rank model"
+            )
+        self.alpha2 = tt.diag(self.R)
+
+    def __add__(self, b):
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
+        return KronTermSum(self, b)
+
+    def __radd__(self, b):
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
+        return KronTermSum(b, self)
+
+    def __mul__(self, b):
+        raise NotImplementedError(
+            "multiplication is not implemented for KronTerm"
+        )
+
+    def __rmul__(self, b):
+        raise NotImplementedError(
+            "multiplication is not implemented for KronTerm"
+        )
+
+    def get_coefficients(self):
+        raise ValueError(
+            "KronTerm objects don't provide coefficients and they can't be "
+            "used in operations with other celerite2 terms"
+        )
+
+    def get_value(self, tau):
+        return kron(self.term.get_value(tau), self.R)
+
+    def to_dense(self, x, diag):
+        K = self.get_value(x[:, None] - x[None, :])
+        K += tt.diag(tt.reshape(diag, (-1,)))
+        return K
+
+    def get_psd(self, omega):
+        return self.term.get_psd(omega)
+
+    def get_celerite_matrices(self, x, diag):
+        # Check the input dimensions
+        x = tt.as_tensor_variable(x)
+        diag = tt.as_tensor_variable(diag)
+        if diag.ndim != 2:
+            raise ValueError("'diag' must have the shape (N, M)")
+
+        # Compute the coefficients and kernel for the celerite subproblem
+        ar, cr, ac, _, cc, _ = self.term.get_coefficients()
+        _, U_sub, V_sub, _ = self.term.get_celerite_matrices(
+            x, tt.zeros_like(x)
+        )
+
+        # Expand the times appropriately
+        x_full = tt.reshape(tt.tile(x[:, None], (1, self.M)), (-1,))
+        dx = x_full[1:] - x_full[:-1]
+
+        a = tt.reshape(
+            diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,)
+        )
+        U = kron(U_sub, self._get_U_kron())
+        V = kron(V_sub, self._get_V_kron())
+
+        c = tt.concatenate((cr, cc, cc))
+        P0 = tt.exp(-c[None, :] * dx[:, None])
+        P = self._copy_P(P0)
+
+        return a, U, V, P
+
+    def get_conditional_mean_matrices(self, x, t):
+        raise NotImplementedError(
+            "Conditional mean matrices have not (yet!) been implemented"
+        )
+
+    # The following should be implemented by subclasses
+    def _get_U_kron(self):
+        return self.R
+
+    def _get_V_kron(self):
+        return tt.eye(self.M)
+
+    def _copy_P(self, P0):
+        return tt.tile(P0[:, :, None], (1, 1, self.M)).reshape(
+            (P0.shape[0], -1)
+        )
+
+
+class LowRankKronTerm(KronTerm):
+    def __init__(self, term, *, alpha):
+        self.term = term
+        self.alpha = tt.as_tensor_variable(alpha)
+        if self.alpha.ndim != 1:
+            raise ValueError(
+                "alpha must be a vector; "
+                "use a general KronTerm for a full rank model"
+            )
+        self.M = self.alpha.shape[0]
+        self.R = self.alpha[None, :] * self.alpha[:, None]
+        self.alpha2 = self.alpha ** 2
+
+    def _get_U_kron(self):
+        return self.alpha[:, None]
+
+    def _get_V_kron(self):
+        return self.alpha[:, None]
+
+    def _copy_P(self, P0):
+        return P0
+
+
+class KronTermSum(TermSumGeneral):
+    @property
+    def dimension(self):
+        return self.terms[0].M
diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py
index 67336be..f49b95f 100644
--- a/python/celerite2/theano/terms.py
+++ b/python/celerite2/theano/terms.py
@@ -3,6 +3,7 @@
 __all__ = [
     "Term",
     "TermSum",
+    "TermSumGeneral",
     "TermProduct",
     "TermDiff",
     "TermConvolution",
@@ -28,11 +29,23 @@ def __init__(self, *, dtype="float64"):
         self.dtype = dtype
         self.coefficients = self.get_coefficients()
 
+    def __len__(self):
+        raise TypeError("len is not implemented for Theano terms")
+
     def __add__(self, b):
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         dtype = theano.scalar.upcast(self.dtype, b.dtype)
+        if (
+            self.__requires_general_addition__
+            or b.__requires_general_addition__
+        ):
+            return TermSumGeneral(self, b, dtype=dtype)
         return TermSum(self, b, dtype=dtype)
 
     def __mul__(self, b):
+        if self.dimension != b.dimension:
+            raise TypeError("Incompatible term dimensions")
         dtype = theano.scalar.upcast(self.dtype, b.dtype)
         return TermProduct(self, b, dtype=dtype)
 
@@ -127,11 +140,10 @@ class TermSum(Term):
     __doc__ = base_terms.TermSum.__doc__
 
     def __init__(self, *terms, **kwargs):
-        if any(isinstance(term, TermConvolution) for term in terms):
+        if any(term.__requires_general_addition__ for term in terms):
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self._terms = terms
         super().__init__(**kwargs)
@@ -145,17 +157,87 @@ def get_coefficients(self):
         return tuple(tt.concatenate(a, axis=0) for a in zip(*coeffs))
 
 
+class TermSumGeneral(Term):
+    __doc__ = base_terms.TermSumGeneral.__doc__
+    __requires_general_addition__ = True
+
+    def __init__(self, *terms):
+        basic = [
+            term for term in terms if not term.__requires_general_addition__
+        ]
+        self._terms = [
+            term for term in terms if term.__requires_general_addition__
+        ]
+        if len(basic):
+            self._terms.insert(0, TermSum(*basic))
+        if not len(self._terms):
+            raise ValueError(
+                "A general term sum cannot be instantiated without any terms"
+            )
+
+    @property
+    def terms(self):
+        return self._terms
+
+    # def __len__(self):
+    #     return sum(map(len, self.terms))
+
+    def get_value(self, tau):
+        K = self.terms[0].get_value(tau)
+        for term in self.terms[1:]:
+            K += term.get_value(tau)
+        return K
+
+    def get_psd(self, omega):
+        p = self.terms[0].get_psd(omega)
+        for term in self.terms[1:]:
+            p += term.get_psd(omega)
+        return p
+
+    def get_celerite_matrices(self, x, diag):
+        x = tt.as_tensor_variable(x)
+        diag = tt.as_tensor_variable(diag)
+        zeros = tt.zeros_like(diag)
+
+        a = []
+        U = []
+        V = []
+        P = []
+        for term in self.terms:
+            args = term.get_celerite_matrices(x, zeros)
+            a.append(args[0])
+            U.append(args[1])
+            V.append(args[2])
+            P.append(args[3])
+
+        a = tt.reshape(diag, -1) + tt.sum(a, axis=0)
+        U = tt.concatenate(U, axis=1)
+        V = tt.concatenate(V, axis=1)
+        P = tt.concatenate(P, axis=1)
+
+        return a, U, V, P
+
+    def get_conditional_mean_matrices(self, x, t):
+        Us = []
+        Vs = []
+        for term in self.terms:
+            U, V, inds = term.get_conditional_mean_matrices(x, t)
+            Us.append(U)
+            Vs.append(V)
+        return tt.concatenate(Us, axis=1), tt.concatenate(Vs, axis=1), inds
+
+
 class TermProduct(Term):
     __doc__ = base_terms.TermProduct.__doc__
 
     def __init__(self, term1, term2, **kwargs):
-        int1 = isinstance(term1, TermConvolution)
-        int2 = isinstance(term2, TermConvolution)
-        if int1 or int2:
+        if (
+            term1.__requires_general_addition__
+            or term2.__requires_general_addition__
+        ):
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term1 = term1
         self.term2 = term2
@@ -230,11 +312,10 @@ class TermDiff(Term):
     __doc__ = base_terms.TermDiff.__doc__
 
     def __init__(self, term, **kwargs):
-        if isinstance(term, TermConvolution):
+        if term.__requires_general_addition__:
             raise TypeError(
-                "You cannot perform operations on an "
-                "TermConvolution, it must be the outer term in "
-                "the kernel"
+                "You cannot perform operations on a term that requires general"
+                " addition, it must be the outer term in the kernel"
             )
         self.term = term
         super().__init__(**kwargs)
@@ -254,6 +335,7 @@ def get_coefficients(self):
 
 
 class TermConvolution(Term):
+    __requires_general_addition__ = True
     __doc__ = base_terms.TermConvolution.__doc__
 
     def __init__(self, term, delta, **kwargs):
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
new file mode 100644
index 0000000..29d35a3
--- /dev/null
+++ b/python/test/theano/test_kron.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import theano
+import theano.tensor as tt
+
+from celerite2.theano import terms, kron
+
+
+def test_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.eval().shape == (N * M,)
+    assert U.eval().shape == (N * M, 2 * M)
+    assert V.eval().shape == (N * M, 2 * M)
+    assert P.eval().shape == (N * M - 1, 2 * M)
+
+    # check_value(term, x, diag, y, t)
+
+
+def test_low_rank_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    alpha = np.random.randn(M)
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.LowRankKronTerm(term0, alpha=alpha)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.eval().shape == (N * M,)
+    assert U.eval().shape == (N * M, 2)
+    assert V.eval().shape == (N * M, 2)
+    assert P.eval().shape == (N * M - 1, 2)
+
+    # check_value(term, x, diag, y, t)
+
+    full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
+    assert np.allclose(
+        full_term.dot(x, diag, y).eval(), term.dot(x, diag, y).eval()
+    )
diff --git a/python/test/theano/test_terms.py b/python/test/theano/test_terms.py
index ebe4f50..1fbcb8a 100644
--- a/python/test/theano/test_terms.py
+++ b/python/test/theano/test_terms.py
@@ -24,7 +24,6 @@
 compare_terms = partial(check_tensor_term, lambda x: x.eval())
 
 
-@pytest.mark.xfail(reason="TermSumGeneral not yet implemented")
 def test_complete_implementation():
     x = np.linspace(-10, 10, 500)
     for name in pyterms.__all__:

From b26bbf2237a223202a87ec9e306fa35e87ef3102 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 18:03:11 -0400
Subject: [PATCH 18/29] updating theano gp for kron

---
 python/celerite2/celerite2.py        |  1 +
 python/celerite2/ext.py              | 87 ++++++++++++++++++--------
 python/celerite2/theano/celerite2.py | 14 +++--
 python/celerite2/theano/kron.py      |  4 --
 python/celerite2/theano/terms.py     |  2 +-
 python/test/theano/test_kron.py      | 92 +++++++++++++++++++++++++++-
 6 files changed, 163 insertions(+), 37 deletions(-)

diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index 8675460..b8474de 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -39,6 +39,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
         self._mean_value = None
         self._diag = None
         self._size = None
+        self._shape = None
         self._log_det = -np.inf
         self._norm = np.inf
 
diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py
index 45a2b84..790e340 100644
--- a/python/celerite2/ext.py
+++ b/python/celerite2/ext.py
@@ -129,6 +129,8 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
         self._t = None
         self._mean_value = None
         self._diag = None
+        self._size = None
+        self._shape = None
         self._log_det = -np.inf
         self._norm = np.inf
 
@@ -138,7 +140,13 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
     def as_tensor(self, tensor):
         raise NotImplementedError("must be implemented by subclasses")
 
-    def zeros_like(self, tensor):
+    def zeros(self, shape):
+        raise NotImplementedError("must be implemented by subclasses")
+
+    def reshape(self, y, shape):
+        raise NotImplementedError("must be implemented by subclasses")
+
+    def diag_squeeze(self, y):
         raise NotImplementedError("must be implemented by subclasses")
 
     def do_compute(self, quiet):
@@ -177,7 +185,12 @@ def compute(
         # Save the diagonal
         self._t = self.as_tensor(t)
         self._mean_value = self._mean(self._t)
-        self._diag = self.zeros_like(self._t)
+        if self.kernel.dimension == 0:
+            self._shape = (self._t.size,)
+        else:
+            self._shape = (self._t.size, self.kernel.dimension)
+        self._diag = self.zeros(self._shape)
+        self._size = self._diag.size
         if yerr is not None:
             if diag is not None:
                 raise ValueError(
@@ -202,21 +215,30 @@ def _process_input(self, y, *, require_vector=False):
         if self._t is None:
             raise RuntimeError("you must call 'compute' first")
         y = self.as_tensor(y)
+        if self.kernel.dimension != 0:
+            y = self.reshape(
+                y, (self._size,) + tuple(y.shape[2:]), ndim=y.ndim - 1
+            )
         if require_vector and y.ndim != 1:
             raise ValueError("'y' must be one dimensional")
         return y
 
+    def _reshape_output(self, y):
+        if self.kernel.dimension == 0:
+            return y
+        return self.reshape(y, (-1, self._shape[1]) + tuple(y.shape[1:]))
+
     def apply_inverse(self, y):
         y = self._process_input(y)
-        return self.do_solve(y)
+        return self._reshape_output(self.do_solve(y))
 
     def dot_tril(self, y):
         y = self._process_input(y)
-        return self.do_dot_tril(y)
+        return self._reshape_output(self.do_dot_tril(y))
 
     def log_likelihood(self, y):
-        y = self._process_input(y, require_vector=True)
-        return self._norm - 0.5 * self.do_norm(y - self._mean_value)
+        y = self._process_input(y - self.mean_value, require_vector=True)
+        return self._norm - 0.5 * self.do_norm(y)
 
     def predict(
         self,
@@ -229,9 +251,9 @@ def predict(
         kernel=None,
         _fast_mean=True,
     ):
-        y = self._process_input(y, require_vector=True)
-        resid = y - self._mean_value
-        alpha = self.do_solve(resid)
+        mean_value = self.mean_value
+        y = self._process_input(y - mean_value, require_vector=True)
+        alpha = self.do_solve(y)
 
         if t is None:
             xs = self._t
@@ -248,26 +270,39 @@ def predict(
 
             if t is None:
                 if include_mean:
-                    mu = y - self._diag * alpha
+                    mu = (
+                        self._reshape_output(
+                            y - self.reshape(self._diag, (-1,)) * alpha
+                        )
+                        + mean_value
+                    )
                 else:
-                    mu = resid - self._diag * alpha
+                    mu = self._reshape_output(
+                        y - self.reshape(self._diag, (-1,)) * alpha
+                    )
 
             elif _fast_mean:
-                (
-                    U_star,
-                    V_star,
-                    inds,
-                ) = self.kernel.get_conditional_mean_matrices(self._t, xs)
-                mu = self.do_conditional_mean(alpha, U_star, V_star, inds)
+                try:
+                    (
+                        U_star,
+                        V_star,
+                        inds,
+                    ) = self.kernel.get_conditional_mean_matrices(self._t, xs)
+                except NotImplementedError:
+                    pass
+                else:
+                    mu = self._reshape_output(
+                        self.do_conditional_mean(alpha, U_star, V_star, inds)
+                    )
 
-                if include_mean:
-                    mu += self._mean(xs)
+                    if include_mean:
+                        mu += self._mean(xs)
 
         if mu is None:
             if kernel is None:
                 kernel = self.kernel
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
-            mu = self.tensordot(KxsT, alpha)
+            mu = self._reshape_output(self.tensordot(KxsT, alpha))
             if include_mean:
                 mu += self._mean(xs)
 
@@ -278,15 +313,19 @@ def predict(
         if KxsT is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
         if return_var:
-            var = kernel.get_value(0.0) - self.diagdot(
-                KxsT, self.do_solve(KxsT)
-            )
+            var = self.diag_squeeze(
+                kernel.get_value(self.zeros((1, 1)))
+            ) - self._reshape_output(self.diagdot(KxsT, self.do_solve(KxsT)))
             return mu, var
 
         # Predictive covariance
         cov = kernel.get_value(xs[:, None] - xs[None, :])
         cov -= self.tensordot(KxsT, self.do_solve(KxsT))
-        return mu, cov
+
+        if self.kernel.dimension == 0:
+            return mu, cov
+
+        return mu, self.reshape(cov, tuple(mu.shape) + tuple(mu.shape))
 
     def sample(self, *args, **kwargs):
         raise NotImplementedError("'sample' is not implemented by extensions")
diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py
index 5fd8a5f..b3a6bc5 100644
--- a/python/celerite2/theano/celerite2.py
+++ b/python/celerite2/theano/celerite2.py
@@ -51,8 +51,14 @@ class GaussianProcess(BaseGaussianProcess):
     def as_tensor(self, tensor):
         return tt.as_tensor_variable(tensor).astype("float64")
 
-    def zeros_like(self, tensor):
-        return tt.zeros_like(tensor)
+    def zeros(self, shape):
+        return tt.zeros(shape)
+
+    def reshape(self, y, shape, ndim=None):
+        return tt.reshape(y, shape, ndim=ndim)
+
+    def diag_squeeze(self, y):
+        return tt.squeeze(tt.diag(y))
 
     def do_compute(self, quiet):
         if quiet:
@@ -69,9 +75,7 @@ def do_compute(self, quiet):
             )
             self._log_det = tt.sum(tt.log(self._d))
 
-        self._norm = -0.5 * (
-            self._log_det + self._t.shape[0] * np.log(2 * np.pi)
-        )
+        self._norm = -0.5 * (self._log_det + self._size * np.log(2 * np.pi))
 
     def check_sorted(self, t):
         return tt.opt.Assert()(t, tt.all(t[1:] - t[:-1] >= 0))
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index cc9d54a..c5c6d9c 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -29,13 +29,9 @@ def __init__(self, term, *, R):
         self.alpha2 = tt.diag(self.R)
 
     def __add__(self, b):
-        if self.dimension != b.dimension:
-            raise TypeError("Incompatible term dimensions")
         return KronTermSum(self, b)
 
     def __radd__(self, b):
-        if self.dimension != b.dimension:
-            raise TypeError("Incompatible term dimensions")
         return KronTermSum(b, self)
 
     def __mul__(self, b):
diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py
index f49b95f..4833b1e 100644
--- a/python/celerite2/theano/terms.py
+++ b/python/celerite2/theano/terms.py
@@ -210,7 +210,7 @@ def get_celerite_matrices(self, x, diag):
             V.append(args[2])
             P.append(args[3])
 
-        a = tt.reshape(diag, -1) + tt.sum(a, axis=0)
+        a = tt.reshape(diag, (-1,)) + tt.sum(a, axis=0)
         U = tt.concatenate(U, axis=1)
         V = tt.concatenate(V, axis=1)
         P = tt.concatenate(P, axis=1)
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index 29d35a3..e851973 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -3,7 +3,65 @@
 import theano
 import theano.tensor as tt
 
-from celerite2.theano import terms, kron
+from celerite2.theano import terms, kron, GaussianProcess
+
+
+def check_value(term, x, diag, y, t):
+    N, M = diag.shape
+
+    K = term.get_value(x[:, None] - x[None, :]).eval()
+
+    try:
+        K0 = term.term.get_value(x[:, None] - x[None, :]).eval()
+    except AttributeError:
+        pass
+    else:
+        assert np.allclose(np.kron(K0, term.R.eval()), K)
+
+    K += np.diag(diag.flatten())
+    K0 = term.dot(x, diag, np.eye(diag.size)).eval()
+    assert np.allclose(K, K0)
+    assert np.allclose(np.dot(K, y), term.dot(x, diag, y).eval())
+
+    gp = GaussianProcess(term, t=x, diag=diag)
+
+    # "log_likelihood" method
+    yval = y[:, 0].reshape((N, M))
+    alpha = np.linalg.solve(K, y[:, 0])
+    loglike = -0.5 * (
+        np.dot(y[:, 0], alpha)
+        + np.linalg.slogdet(K)[1]
+        + len(K) * np.log(2 * np.pi)
+    )
+    assert np.allclose(loglike, gp.log_likelihood(yval).eval())
+
+    # Predict
+    K0 = K - np.diag(diag.flatten())
+    mu0 = np.dot(K0, alpha)
+    cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T))
+
+    mu, var = theano.function([], gp.predict(yval, return_var=True))()
+    _, cov = theano.function([], gp.predict(yval, return_cov=True))()
+    assert np.allclose(mu, mu0.reshape((N, M)))
+    assert np.allclose(var, np.diag(cov0).reshape((N, M)))
+    assert np.allclose(cov, cov0.reshape((N, M, N, M)))
+
+    mu1, var1 = theano.function([], gp.predict(yval, t=x, return_var=True))()
+    _, cov1 = theano.function([], gp.predict(yval, t=x, return_cov=True))()
+    assert np.allclose(mu, mu1)
+    assert np.allclose(var, var1)
+    assert np.allclose(cov, cov1)
+
+    K0 = term.get_value(t[:, None] - x[None, :]).eval()
+    mu0 = np.dot(K0, alpha)
+    cov0 = term.get_value(t[:, None] - t[None, :]).eval() - np.dot(
+        K0, np.linalg.solve(K, K0.T)
+    )
+    mu, var = theano.function([], gp.predict(yval, t=t, return_var=True))()
+    _, cov = theano.function([], gp.predict(yval, t=t, return_cov=True))()
+    assert np.allclose(mu, mu0.reshape((len(t), M)))
+    assert np.allclose(var, np.diag(cov0).reshape((len(t), M)))
+    assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M)))
 
 
 def test_value():
@@ -30,7 +88,7 @@ def test_value():
     assert V.eval().shape == (N * M, 2 * M)
     assert P.eval().shape == (N * M - 1, 2 * M)
 
-    # check_value(term, x, diag, y, t)
+    check_value(term, x, diag, y, t)
 
 
 def test_low_rank_value():
@@ -53,9 +111,37 @@ def test_low_rank_value():
     assert V.eval().shape == (N * M, 2)
     assert P.eval().shape == (N * M - 1, 2)
 
-    # check_value(term, x, diag, y, t)
+    check_value(term, x, diag, y, t)
 
     full_term = kron.KronTerm(term0, R=np.outer(alpha, alpha))
     assert np.allclose(
         full_term.dot(x, diag, y).eval(), term.dot(x, diag, y).eval()
     )
+
+
+def test_sum_value():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    alpha = np.random.randn(M)
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag)
+    assert a.eval().shape == (N * M,)
+    assert U.eval().shape == (N * M, 2 * M + 2)
+    assert V.eval().shape == (N * M, 2 * M + 2)
+    assert P.eval().shape == (N * M - 1, 2 * M + 2)
+
+    check_value(term, x, diag, y, t)

From bb88ce28f26e9afaa1d9278c45822ff129522dab Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 18:05:33 -0400
Subject: [PATCH 19/29] updating jax an torch

---
 python/celerite2/jax/celerite2.py   | 12 +++++++++---
 python/celerite2/torch/celerite2.py | 10 ++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/python/celerite2/jax/celerite2.py b/python/celerite2/jax/celerite2.py
index 1dbbe54..d5aff69 100644
--- a/python/celerite2/jax/celerite2.py
+++ b/python/celerite2/jax/celerite2.py
@@ -12,8 +12,14 @@ class GaussianProcess(BaseGaussianProcess):
     def as_tensor(self, tensor):
         return np.asarray(tensor, dtype=np.float64)
 
-    def zeros_like(self, tensor):
-        return np.zeros_like(tensor)
+    def zeros(self, shape):
+        return np.zeros(shape)
+
+    def reshape(self, y, shape, ndim=None):
+        return np.reshape(y, shape)
+
+    def diag_squeeze(self, y):
+        return np.squeeze(np.diag(y))
 
     def do_compute(self, quiet):
         # Compute the Cholesky factorization
@@ -29,7 +35,7 @@ def do_compute(self, quiet):
         else:
             self._log_det = np.sum(np.log(self._d))
             self._norm = -0.5 * (
-                self._log_det + len(self._t) * np.log(2 * np.pi)
+                self._log_det + self._size * np.log(2 * np.pi)
             )
 
     def check_sorted(self, t):
diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py
index d7b7e85..fb5f2da 100644
--- a/python/celerite2/torch/celerite2.py
+++ b/python/celerite2/torch/celerite2.py
@@ -29,8 +29,14 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
     def as_tensor(self, tensor):
         return as_tensor(tensor)
 
-    def zeros_like(self, tensor):
-        return torch.zeros_like(tensor)
+    def zeros(self, shape):
+        return torch.zeros(shape)
+
+    def reshape(self, y, shape, ndim=None):
+        return torch.reshape(y, shape)
+
+    def diag_squeeze(self, y):
+        return torch.squeeze(torch.diag(y))
 
     def do_compute(self, quiet):
         # Compute the Cholesky factorization

From 1430f46ce3c402a8321a49ed1c0a3e49d4e9b0b8 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 18:06:09 -0400
Subject: [PATCH 20/29] sorting imports

---
 python/celerite2/__init__.py        | 2 +-
 python/celerite2/theano/__init__.py | 2 +-
 python/celerite2/theano/kron.py     | 5 ++---
 python/test/theano/test_kron.py     | 4 ++--
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/python/celerite2/__init__.py b/python/celerite2/__init__.py
index d9e6d14..e5db30d 100644
--- a/python/celerite2/__init__.py
+++ b/python/celerite2/__init__.py
@@ -2,7 +2,7 @@
 
 __all__ = ["__version__", "terms", "kron", "GaussianProcess"]
 
-from . import terms, kron
+from . import kron, terms
 from .celerite2 import GaussianProcess
 from .celerite2_version import __version__
 
diff --git a/python/celerite2/theano/__init__.py b/python/celerite2/theano/__init__.py
index 905f38c..38b1142 100644
--- a/python/celerite2/theano/__init__.py
+++ b/python/celerite2/theano/__init__.py
@@ -2,6 +2,6 @@
 
 __all__ = ["terms", "kron", "GaussianProcess", "CeleriteNormal"]
 
-from . import terms, kron
+from . import kron, terms
 from .celerite2 import GaussianProcess
 from .distribution import CeleriteNormal
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index c5c6d9c..4f3b622 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -1,12 +1,11 @@
 # -*- coding: utf-8 -*-
 
 __all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"]
-
-import theano.tensor as tt
+from theano import tensor as tt
 from theano.tensor.slinalg import kron
 
-from .terms import Term, TermSumGeneral
 from .. import kron as base_kron
+from .terms import Term, TermSumGeneral
 
 
 class KronTerm(Term):
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index e851973..1c8d3e0 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import theano
-import theano.tensor as tt
+from theano import tensor as tt
 
-from celerite2.theano import terms, kron, GaussianProcess
+from celerite2.theano import GaussianProcess, kron, terms
 
 
 def check_value(term, x, diag, y, t):

From 9a776204d62eb47c6896d544da1866e6ac9cb3c6 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 19:25:44 -0400
Subject: [PATCH 21/29] typos

---
 python/celerite2/ext.py         | 10 +++++++---
 python/celerite2/jax/terms.py   |  4 ++++
 python/celerite2/torch/terms.py |  4 ++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py
index 790e340..0158674 100644
--- a/python/celerite2/ext.py
+++ b/python/celerite2/ext.py
@@ -313,9 +313,13 @@ def predict(
         if KxsT is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
         if return_var:
-            var = self.diag_squeeze(
-                kernel.get_value(self.zeros((1, 1)))
-            ) - self._reshape_output(self.diagdot(KxsT, self.do_solve(KxsT)))
+            if self.kernel.dimension == 0:
+                var0 = kernel.get_value(0.0)
+            else:
+                var0 = self.diag_squeeze(kernel.get_value(self.zeros((1, 1))))
+            var = var0 - self._reshape_output(
+                self.diagdot(KxsT, self.do_solve(KxsT))
+            )
             return mu, var
 
         # Predictive covariance
diff --git a/python/celerite2/jax/terms.py b/python/celerite2/jax/terms.py
index 10d5884..ab887d7 100644
--- a/python/celerite2/jax/terms.py
+++ b/python/celerite2/jax/terms.py
@@ -27,6 +27,10 @@ def __add__(self, b):
     def __mul__(self, b):
         return TermProduct(self, b)
 
+    @property
+    def dimension(self):
+        return 0
+
     @property
     def terms(self):
         return [self]
diff --git a/python/celerite2/torch/terms.py b/python/celerite2/torch/terms.py
index 8de426d..a523e73 100644
--- a/python/celerite2/torch/terms.py
+++ b/python/celerite2/torch/terms.py
@@ -33,6 +33,10 @@ def __add__(self, b):
     def __mul__(self, b):
         return TermProduct(self, b)
 
+    @property
+    def dimension(self):
+        return 0
+
     @property
     def terms(self):
         return [self]

From 4d6d3ca1b25515cd6f7d57f2f7738a14d4b6b928 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 19:41:10 -0400
Subject: [PATCH 22/29] torch sizes

---
 python/celerite2/ext.py             |  9 ++++++---
 python/celerite2/torch/celerite2.py | 14 ++++++++++++++
 python/test/theano/test_kron.py     |  1 -
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/python/celerite2/ext.py b/python/celerite2/ext.py
index 0158674..93a06b8 100644
--- a/python/celerite2/ext.py
+++ b/python/celerite2/ext.py
@@ -140,6 +140,9 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
     def as_tensor(self, tensor):
         raise NotImplementedError("must be implemented by subclasses")
 
+    def get_size(self, tensor):
+        return tensor.size
+
     def zeros(self, shape):
         raise NotImplementedError("must be implemented by subclasses")
 
@@ -186,11 +189,11 @@ def compute(
         self._t = self.as_tensor(t)
         self._mean_value = self._mean(self._t)
         if self.kernel.dimension == 0:
-            self._shape = (self._t.size,)
+            self._shape = (self.get_size(self._t),)
         else:
-            self._shape = (self._t.size, self.kernel.dimension)
+            self._shape = (self.get_size(self._t), self.kernel.dimension)
         self._diag = self.zeros(self._shape)
-        self._size = self._diag.size
+        self._size = self.get_size(self._diag)
         if yerr is not None:
             if diag is not None:
                 raise ValueError(
diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py
index fb5f2da..8b7e805 100644
--- a/python/celerite2/torch/celerite2.py
+++ b/python/celerite2/torch/celerite2.py
@@ -19,16 +19,30 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
 
         # Placeholders for storing data
         self._t = None
+        self._mean_value = None
         self._diag = None
+        self._size = None
+        self._shape = None
         self._log_det = -np.inf
         self._norm = np.inf
 
         if t is not None:
             self.compute(t, **kwargs)
 
+    @property
+    def mean_value(self):
+        if self._mean_value is None:
+            raise RuntimeError(
+                "'compute' must be executed before accessing mean_value"
+            )
+        return self._mean_value
+
     def as_tensor(self, tensor):
         return as_tensor(tensor)
 
+    def get_size(self, tensor):
+        return torch.prod(tensor.size())
+
     def zeros(self, shape):
         return torch.zeros(shape)
 
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index 1c8d3e0..d8e6f5d 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 import numpy as np
 import theano
-from theano import tensor as tt
 
 from celerite2.theano import GaussianProcess, kron, terms
 

From dd91eb9724e5e660d818a7ca6628349802614ade Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Tue, 20 Oct 2020 20:08:55 -0400
Subject: [PATCH 23/29] torch size again

---
 python/celerite2/torch/celerite2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/celerite2/torch/celerite2.py b/python/celerite2/torch/celerite2.py
index 8b7e805..19c7273 100644
--- a/python/celerite2/torch/celerite2.py
+++ b/python/celerite2/torch/celerite2.py
@@ -41,7 +41,7 @@ def as_tensor(self, tensor):
         return as_tensor(tensor)
 
     def get_size(self, tensor):
-        return torch.prod(tensor.size())
+        return torch.numel(tensor)
 
     def zeros(self, shape):
         return torch.zeros(shape)

From badd7292143eb03596f1c1e220408c687f28d802 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 21 Oct 2020 09:09:31 -0400
Subject: [PATCH 24/29] adding tests for citations

---
 python/celerite2/theano/celerite2.py |  2 +
 python/celerite2/theano/kron.py      | 26 ++++++++++++
 python/celerite2/theano/terms.py     |  1 +
 python/test/theano/test_celerite2.py | 63 +++++++++++++++++-----------
 python/test/theano/test_kron.py      | 59 ++++++++++++++------------
 setup.py                             |  2 +-
 6 files changed, 100 insertions(+), 53 deletions(-)

diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py
index b3a6bc5..aad68a0 100644
--- a/python/celerite2/theano/celerite2.py
+++ b/python/celerite2/theano/celerite2.py
@@ -106,6 +106,8 @@ def _add_citations_to_pymc3_model(self, **kwargs):
         if not hasattr(model, "__citations__"):
             model.__citations__ = dict()
         model.__citations__["celerite2"] = CITATIONS
+        if self.kernel.__citations__:
+            model.__citations__["celerite2:kernel"] = self.kernel.__citations__
 
     def marginal(self, name, **kwargs):
         """Add the marginal likelihood to a PyMC3 model
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index 4f3b622..a5c6d01 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -7,10 +7,34 @@
 from .. import kron as base_kron
 from .terms import Term, TermSumGeneral
 
+CITATIONS = (
+    ("celerite2:gordon20",),
+    r"""
+@article{celerite2:gordon20,
+       author = {{Gordon}, Tyler and {Agol}, Eric and
+                 {Foreman-Mackey}, Daniel},
+        title = "{A Fast, 2D Gaussian Process Method Based on Celerite:
+                  Applications to Transiting Exoplanet Discovery and
+                  Characterization}",
+      journal = {arXiv e-prints},
+         year = 2020,
+        month = jul,
+          eid = {arXiv:2007.05799},
+        pages = {arXiv:2007.05799},
+archivePrefix = {arXiv},
+       eprint = {2007.05799},
+ primaryClass = {astro-ph.IM},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2020arXiv200705799G},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+""",
+)
+
 
 class KronTerm(Term):
     __requires_general_addition__ = True
     __doc__ = base_kron.KronTerm.__doc__
+    __citations__ = CITATIONS
 
     @property
     def dimension(self):
@@ -131,6 +155,8 @@ def _copy_P(self, P0):
 
 
 class KronTermSum(TermSumGeneral):
+    __citations__ = CITATIONS
+
     @property
     def dimension(self):
         return self.terms[0].M
diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py
index 4833b1e..90d7a8e 100644
--- a/python/celerite2/theano/terms.py
+++ b/python/celerite2/theano/terms.py
@@ -24,6 +24,7 @@
 
 class Term(base_terms.Term):
     __doc__ = base_terms.Term.__doc__
+    __citations__ = None
 
     def __init__(self, *, dtype="float64"):
         self.dtype = dtype
diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py
index 7ef1aed..b9caa65 100644
--- a/python/test/theano/test_celerite2.py
+++ b/python/test/theano/test_celerite2.py
@@ -1,25 +1,13 @@
 # -*- coding: utf-8 -*-
 import numpy as np
+import pymc3 as pm
 import pytest
 
 import celerite2
 from celerite2 import terms as pyterms
 from celerite2.testing import check_gp_models
-
-try:
-    import theano  # noqa
-except ImportError:
-    HAS_THEANO = False
-else:
-    from celerite2.theano import GaussianProcess, terms
-
-    HAS_THEANO = True
-
-
-pytestmark = pytest.mark.skipif(
-    not HAS_THEANO, reason="Theano is not installed"
-)
-
+from celerite2.theano import GaussianProcess, terms
+from celerite2.theano.celerite2 import CITATIONS
 
 term_mark = pytest.mark.parametrize(
     "name,args",
@@ -35,15 +23,21 @@
 )
 
 
-@term_mark
-@pytest.mark.parametrize("mean", [0.0, 10.5])
-def test_consistency(name, args, mean):
+@pytest.fixture
+def data():
     # Generate fake data
     np.random.seed(40582)
     x = np.sort(np.random.uniform(0, 10, 50))
     t = np.sort(np.random.uniform(-1, 12, 100))
     diag = np.random.uniform(0.1, 0.3, len(x))
     y = np.sin(x)
+    return x, diag, y, t
+
+
+@term_mark
+@pytest.mark.parametrize("mean", [0.0, 10.5])
+def test_consistency(name, args, mean, data):
+    x, diag, y, t = data
 
     term = getattr(terms, name)(**args)
     gp = GaussianProcess(term, mean=mean)
@@ -56,13 +50,8 @@ def test_consistency(name, args, mean):
     check_gp_models(lambda x: x.eval(), gp, pygp, y, t)
 
 
-def test_errors():
-    # Generate fake data
-    np.random.seed(40582)
-    x = np.sort(np.random.uniform(0, 10, 50))
-    t = np.sort(np.random.uniform(-1, 12, 100))
-    diag = np.random.uniform(0.1, 0.3, len(x))
-    y = np.sin(x)
+def test_errors(data):
+    x, diag, y, t = data
 
     term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0)
     gp = GaussianProcess(term)
@@ -108,3 +97,27 @@ def test_errors():
 
     with pytest.raises(ValueError):
         gp.predict(y, t=np.tile(t[:, None], (1, 5)))
+
+
+def test_marginal(data):
+    x, diag, y, t = data
+
+    with pm.Model() as model:
+        term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0)
+        gp = GaussianProcess(term, t=x, diag=diag)
+        gp.marginal("obs", observed=y)
+
+        assert np.allclose(
+            model.fastfn(model.logpt)(model.test_point),
+            model.fastfn(gp.log_likelihood(y))(model.test_point),
+        )
+
+
+def test_citations(data):
+    x, diag, y, t = data
+
+    with pm.Model() as model:
+        term = terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0)
+        gp = GaussianProcess(term, t=x, diag=diag)
+        gp.marginal("obs", observed=y)
+        assert model.__citations__["celerite2"] == CITATIONS
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index d8e6f5d..9a98d3e 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -1,10 +1,26 @@
 # -*- coding: utf-8 -*-
 import numpy as np
+import pymc3 as pm
+import pytest
 import theano
 
 from celerite2.theano import GaussianProcess, kron, terms
 
 
+@pytest.fixture
+def data():
+    N = 100
+    M = 5
+
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+
+    return N, M, x, diag, y, t
+
+
 def check_value(term, x, diag, y, t):
     N, M = diag.shape
 
@@ -63,15 +79,8 @@ def check_value(term, x, diag, y, t):
     assert np.allclose(cov, cov0.reshape((len(t), M, len(t), M)))
 
 
-def test_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_value(data):
+    N, M, x, diag, y, t = data
 
     R = np.random.randn(M, M)
     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
@@ -90,15 +99,8 @@ def test_value():
     check_value(term, x, diag, y, t)
 
 
-def test_low_rank_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_low_rank_value(data):
+    N, M, x, diag, y, t = data
 
     alpha = np.random.randn(M)
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
@@ -118,15 +120,8 @@ def test_low_rank_value():
     )
 
 
-def test_sum_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_sum_value(data):
+    N, M, x, diag, y, t = data
 
     alpha = np.random.randn(M)
     R = np.random.randn(M, M)
@@ -144,3 +139,13 @@ def test_sum_value():
     assert P.eval().shape == (N * M - 1, 2 * M + 2)
 
     check_value(term, x, diag, y, t)
+
+
+def test_citations(data):
+    N, M, x, diag, y, t = data
+
+    with pm.Model() as model:
+        term = kron.KronTerm(terms.SHOTerm(S0=1.0, w0=0.5, Q=3.0), R=np.eye(M))
+        gp = GaussianProcess(term, t=x, diag=diag)
+        gp.marginal("obs", observed=y[:, 0].reshape((N, M)))
+        assert model.__citations__["celerite2:kernel"] == kron.CITATIONS
diff --git a/setup.py b/setup.py
index 658b8fe..9fbf1c0 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@
         "scipy",
         "celerite>=0.3.1",
     ],
-    "theano": ["theano"],
+    "theano": ["theano", "pymc3"],
     "torch": ["torch"],
     "jax": ["jax", "jaxlib"],
     "release": ["pep517", "twine"],

From 7bd2ce3e0eaaefd2ea566ef32ee5d482ff895a2c Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 21 Oct 2020 10:53:37 -0400
Subject: [PATCH 25/29] adding kron tutorial

---
 docs/index.rst          |   1 +
 docs/tutorials/first.py |   2 +-
 docs/tutorials/kron.py  | 130 ++++++++++++++++++++++++++++++++++++++++
 setup.py                |   1 +
 4 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 docs/tutorials/kron.py

diff --git a/docs/index.rst b/docs/index.rst
index a27a727..461a8da 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -36,6 +36,7 @@ be a good choice.
    :caption: Tutorials
 
    tutorials/first.ipynb
+   tutorials/kron.ipynb
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/tutorials/first.py b/docs/tutorials/first.py
index 82ad05c..d186f54 100644
--- a/docs/tutorials/first.py
+++ b/docs/tutorials/first.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.6.0
+#       jupytext_version: 1.5.2
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
diff --git a/docs/tutorials/kron.py b/docs/tutorials/kron.py
new file mode 100644
index 0000000..02e909c
--- /dev/null
+++ b/docs/tutorials/kron.py
@@ -0,0 +1,130 @@
+# -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.5.2
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# + nbsphinx="hidden"
+# %matplotlib inline
+
+# + nbsphinx="hidden"
+# %run notebook_setup
+# -
+
+# # Multivariate models
+#
+
+# +
+import numpy as np
+import matplotlib.pyplot as plt
+
+import celerite2
+
+N = 200
+M = 5
+lam = np.linspace(0, 3, M)
+
+np.random.seed(59302)
+t = np.append(
+    np.sort(np.random.uniform(0, 4, N // 2)),
+    np.sort(np.random.uniform(6, 10, N - N // 2)),
+)
+yerr = np.random.uniform(1e-1, 2e-1, (N, M))
+
+rho_true = 4.5
+R_true = 0.5 * np.exp(-0.5 * (lam[:, None] - lam[None, :]) ** 2)
+kernel = celerite2.kron.KronTerm(
+    celerite2.terms.SHOTerm(sigma=1.0, rho=rho_true, Q=3.0), R=R_true
+)
+gp = celerite2.GaussianProcess(kernel, t=t, yerr=yerr)
+y = gp.sample()
+
+plt.yticks([])
+for m in range(M):
+    plt.axhline(2 * m, color="k", lw=0.5)
+plt.plot(t, y + 2 * np.arange(M), ".")
+plt.ylim(-2, 2 * M)
+plt.xlim(-1, 11)
+plt.xlabel("x")
+_ = plt.ylabel("y (with offsets)")
+
+# +
+import pymc3 as pm
+import pymc3_ext as pmx
+import celerite2.theano as cl2
+
+with pm.Model() as model:
+
+    rho = pm.Lognormal("rho", mu=np.log(5.0), sigma=5.0)
+    chol = pm.LKJCholeskyCov(
+        "chol",
+        eta=10.0,
+        n=M,
+        sd_dist=pm.Exponential.dist(0.01),
+        compute_corr=True,
+    )[0]
+    R = pm.Deterministic("R", pm.math.dot(chol, chol.T))
+
+    kernel = cl2.kron.KronTerm(
+        cl2.terms.SHOTerm(sigma=1.0, rho=rho, Q=3.0), R=R
+    )
+    gp = cl2.GaussianProcess(kernel, t=t, yerr=yerr)
+    gp.marginal("obs", observed=y)
+
+    soln = pmx.optimize()
+
+# +
+t_pred = np.linspace(-1, 11, 1000)
+with model:
+    mu, var = pmx.eval_in_model(gp.predict(y, t=t_pred, return_var=True), soln)
+
+plt.yticks([])
+for m in range(M):
+    plt.axhline(2 * m, color="k", lw=0.5)
+    plt.plot(t, y[:, m] + 2 * m, ".", color=f"C{m}")
+    plt.fill_between(
+        t_pred,
+        mu[:, m] - np.sqrt(var[:, m]) + 2 * m,
+        mu[:, m] + np.sqrt(var[:, m]) + 2 * m,
+        color=f"C{m}",
+        alpha=0.5,
+    )
+    plt.plot(t_pred, mu[:, m] + 2 * m, color=f"C{m}")
+
+plt.ylim(-2, 2 * M)
+plt.xlim(-1, 11)
+plt.xlabel("x")
+_ = plt.ylabel("y (with offsets)")
+# -
+
+with model:
+    trace = pm.sample(
+        tune=2000, draws=2000, target_accept=0.9, init="adapt_full"
+    )
+
+plt.hist(trace["rho"], 50, histtype="step", color="k")
+plt.axvline(rho_true)
+plt.yticks([])
+plt.xlabel(r"$\rho$")
+plt.ylabel(r"$p(\rho)$")
+
+for m in range(M):
+    plt.errorbar(
+        np.arange(M),
+        np.mean(trace["R"][:, m, :], axis=0) + m,
+        yerr=np.std(trace["R"][:, m, :], axis=0),
+        color=f"C{m}",
+    )
+    plt.plot(np.arange(M), R_true[m] + m, ":", color=f"C{m}")
+plt.yticks([])
+plt.xlabel("band index")
+_ = plt.ylabel("covariance (with offsets)")
diff --git a/setup.py b/setup.py
index 9fbf1c0..ef3da7c 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,7 @@
         "emcee",
         "pymc3",
         "tqdm",
+        "pymc3-ext",
     ],
 }
 EXTRA_REQUIRE["dev"] = (

From 40ee27a997c54f4a101a0c8be3759423fd108d46 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 21 Oct 2020 11:55:39 -0400
Subject: [PATCH 26/29] dealing with testvals

---
 python/celerite2/theano/celerite2.py | 8 +-------
 python/test/theano/conftest.py       | 1 +
 python/test/theano/test_celerite2.py | 9 ++++++---
 python/test/theano/test_ops.py       | 1 +
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/python/celerite2/theano/celerite2.py b/python/celerite2/theano/celerite2.py
index aad68a0..ee84945 100644
--- a/python/celerite2/theano/celerite2.py
+++ b/python/celerite2/theano/celerite2.py
@@ -8,11 +8,6 @@
 from . import ops
 from .distribution import CeleriteNormal
 
-try:
-    import pymc3 as pm
-except ImportError:
-    pm = None
-
 CITATIONS = (
     ("celerite2:foremanmackey17", "celerite2:foremanmackey18"),
     r"""
@@ -99,8 +94,7 @@ def diagdot(self, a, b):
         return tt.batched_dot(a.T, b.T)
 
     def _add_citations_to_pymc3_model(self, **kwargs):
-        if not pm:
-            raise ImportError("pymc3 is required for the 'marginal' method")
+        import pymc3 as pm
 
         model = pm.modelcontext(kwargs.get("model", None))
         if not hasattr(model, "__citations__"):
diff --git a/python/test/theano/conftest.py b/python/test/theano/conftest.py
index baeea58..e91bc99 100644
--- a/python/test/theano/conftest.py
+++ b/python/test/theano/conftest.py
@@ -7,6 +7,7 @@ def pytest_configure(config):
     import platform
 
     theano.config.floatX = "float64"
+    theano.config.compute_test_value = "raise"
     if platform.system() == "Darwin":
         theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"
     config.addinivalue_line("filterwarnings", "ignore")
diff --git a/python/test/theano/test_celerite2.py b/python/test/theano/test_celerite2.py
index b9caa65..d1ae383 100644
--- a/python/test/theano/test_celerite2.py
+++ b/python/test/theano/test_celerite2.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import numpy as np
-import pymc3 as pm
 import pytest
 
 import celerite2
@@ -61,8 +60,8 @@ def test_errors(data):
         gp.log_likelihood(y)
 
     # Sorted
-    gp.compute(x[::-1], diag=diag)
     with pytest.raises(AssertionError):
+        gp.compute(x[::-1], diag=diag)
         gp._d.eval()
 
     # 1D
@@ -74,8 +73,8 @@ def test_errors(data):
         gp.compute(x, diag=diag, yerr=np.sqrt(diag))
 
     # Not positive definite
-    gp.compute(x, diag=-10 * diag)
     with pytest.raises(celerite2.backprop.LinAlgError):
+        gp.compute(x, diag=-10 * diag)
         gp._d.eval()
 
     # Not positive definite with `quiet`
@@ -100,6 +99,8 @@ def test_errors(data):
 
 
 def test_marginal(data):
+    import pymc3 as pm
+
     x, diag, y, t = data
 
     with pm.Model() as model:
@@ -114,6 +115,8 @@ def test_marginal(data):
 
 
 def test_citations(data):
+    import pymc3 as pm
+
     x, diag, y, t = data
 
     with pm.Model() as model:
diff --git a/python/test/theano/test_ops.py b/python/test/theano/test_ops.py
index 6b36587..62956c2 100644
--- a/python/test/theano/test_ops.py
+++ b/python/test/theano/test_ops.py
@@ -34,6 +34,7 @@ def convert_values_to_types(values):
             types.append(tt.dmatrix())
         else:
             raise ValueError("unknown type")
+        types[-1].tag.test_value = v
     return types
 
 

From e71fb6c1425a16487cff23520673daa33ac850cf Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Wed, 21 Oct 2020 15:39:57 -0400
Subject: [PATCH 27/29] adding docstrings for kron terms

---
 docs/api/python.rst             | 13 +++++++++++++
 python/celerite2/kron.py        | 29 +++++++++++++++++++++++++++++
 python/celerite2/theano/kron.py |  3 +++
 3 files changed, 45 insertions(+)

diff --git a/docs/api/python.rst b/docs/api/python.rst
index 81f9f74..058e6a9 100644
--- a/docs/api/python.rst
+++ b/docs/api/python.rst
@@ -72,3 +72,16 @@ recommended unless you're confident that you know what you're doing.
 .. autoclass:: celerite2.terms.RealTerm
 .. autoclass:: celerite2.terms.ComplexTerm
 .. autoclass:: celerite2.terms.OriginalCeleriteTerm
+
+Multivariate models
+-------------------
+
+The original *celerite* algorithm was only defined for one dimensional inputs,
+but this was generalized by `Gordon et al. (2020)
+<https://arxiv.org/abs/2007.05799>`_ to support multivariate inputs on tensor
+product grids with separable kernels. In this case, the covariance matrix is
+given by a Kronecker product. These models are now available in *celerite2*
+using the following terms:
+
+.. autoclass:: celerite2.kron.KronTerm
+.. autoclass:: celerite2.kron.LowRankKronTerm
diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index 18b35ec..1643b7e 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -8,6 +8,22 @@
 
 
 class KronTerm(Term):
+    """A general multivariate celerite term
+
+    This term supports rectangular data with the shape ``N, M`` where ``N`` is
+    the usual "time" axis or similar, but ``M`` is the dimension of the output
+    space. The model is that the multivariate data are correlated samples from
+    an underlying process descirbed by a standard *celerite* kernel. The
+    covariance between output dimensions is given by the general ``M x M``
+    matrix ``R``. More details about this model can be found in `Gordon et al.
+    (2020) <https://arxiv.org/abs/2007.05799>`_.
+
+    Args:
+        term (celerite2.terms.Term): The celerite term describing the
+            underlying process.
+        R (shape[M, M]): The covariance matrix between output dimensions.
+    """
+
     __requires_general_addition__ = True
 
     @property
@@ -187,6 +203,19 @@ def _copy_P(self, P0, P):
 
 
 class LowRankKronTerm(KronTerm):
+    """A low rank multivariate celerite term
+
+    A low rank version of :class:`celerite2.kron.KronTerm` where the covariance
+    between outputs is ``R = alpha * alpha^T``, where ``alpha`` is a column
+    vector of size ``M``. More details about this model can be found in `Gordon
+    et al. (2020) <https://arxiv.org/abs/2007.05799>`_.
+
+    Args:
+        term (celerite2.terms.Term): The celerite term describing the
+            underlying process.
+        alpha (shape[M]): The vector of amplitudes for each output.
+    """
+
     def __init__(self, term, *, alpha):
         self.term = term
         self.alpha = np.ascontiguousarray(
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index a5c6d01..32f45e9 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -132,6 +132,8 @@ def _copy_P(self, P0):
 
 
 class LowRankKronTerm(KronTerm):
+    __doc__ = base_kron.LowRankKronTerm.__doc__
+
     def __init__(self, term, *, alpha):
         self.term = term
         self.alpha = tt.as_tensor_variable(alpha)
@@ -156,6 +158,7 @@ def _copy_P(self, P0):
 
 class KronTermSum(TermSumGeneral):
     __citations__ = CITATIONS
+    __doc__ = base_kron.KronTermSum.__doc__
 
     @property
     def dimension(self):

From f978884bd2175302c501d599bc9e4ec562101504 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Thu, 22 Oct 2020 08:23:01 -0400
Subject: [PATCH 28/29] simplifying kron needs

---
 docs/api/python.rst             |   3 +-
 docs/tutorials/kron.py          |  11 ++++
 python/celerite2/kron.py        | 103 +++++++++-----------------------
 python/celerite2/theano/kron.py |  80 ++++++++++---------------
 python/test/test_kron.py        |   4 +-
 python/test/theano/test_kron.py |   4 +-
 6 files changed, 74 insertions(+), 131 deletions(-)

diff --git a/docs/api/python.rst b/docs/api/python.rst
index 058e6a9..66452e3 100644
--- a/docs/api/python.rst
+++ b/docs/api/python.rst
@@ -81,7 +81,6 @@ but this was generalized by `Gordon et al. (2020)
 <https://arxiv.org/abs/2007.05799>`_ to support multivariate inputs on tensor
 product grids with separable kernels. In this case, the covariance matrix is
 given by a Kronecker product. These models are now available in *celerite2*
-using the following terms:
+using the following:
 
 .. autoclass:: celerite2.kron.KronTerm
-.. autoclass:: celerite2.kron.LowRankKronTerm
diff --git a/docs/tutorials/kron.py b/docs/tutorials/kron.py
index 02e909c..4a50e9f 100644
--- a/docs/tutorials/kron.py
+++ b/docs/tutorials/kron.py
@@ -22,6 +22,17 @@
 
 # # Multivariate models
 #
+# The original *celerite* package only supported one-dimensional data (like time series), but [Gordon et al. (2020)](https://arxiv.org/abs/2007.05799) generalized the method to multivariate data on a tensor product grid.
+# This has been implmented in *celerite2* so "rectangular" data are now supported.
+# The main application discussed by [Gordon et al. (2020)](https://arxiv.org/abs/2007.05799) was multiwavelength observations of transiting exoplanets, but this can be applicable to many other problems with the following structure:
+#
+# 1. The rectagular data must be fully filled: you must have observations in every band at every time. We've developed a method to handle missing data and that will be included in a future release.
+# 2. The covariance matrix must be seperable with the form `k({x, y}_n, {x, y}_m) = k1(x_n, x_m) * k2(y_n, y_m)`, where `x` (a scalar) indexes the "longest" one-dimensional axis of the data (for example, time) and `y` (optionally a vector) indexes the narrower axis of the data (for example, wavelength). To apply *celerite*, we must make the further assumption that `k1(x_n, x_m)` is a standard *celerite* kernel, but no limitations are placed on the form of `k2(y_n, y_m)`.
+#
+# The implementation of this method in *celerite2* comes with two forms for the kernel:
+#
+# 1. `kron.KronTerm`: A general form of the model where `k2(y_n, y_m)` is specified as a full-rank `M x M` matrix called `R`, where `M` is the size of the `y` dimension. The computational cost of evaluating this model scales as `O(N * J^2 * M^3)` where `N` is the size of the `x` dimension and `J` is the rank of the *celerite* term describing `k1(x_n, x_m)`.
+# 2. `kron.LowRankKronTerm`: A more computationally efficient method where
 
 # +
 import numpy as np
diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index 1643b7e..c7a055d 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"]
+__all__ = ["KronTerm", "KronTermSum"]
 
 import numpy as np
 
@@ -30,21 +30,32 @@ class KronTerm(Term):
     def dimension(self):
         return self.M
 
-    def __init__(self, term, *, R):
+    def __init__(self, term, *, R=None, L=None):
         self.term = term
-        self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
-        self.M = len(self.R)
-        if self.M < 1:
-            raise ValueError("At least one 'band' is required")
-        if self.R.ndim != 2 or self.R.shape != (self.M, self.M):
-            raise ValueError(
-                "R must be a square matrix; "
-                "use a LowRankKronTerm for the low rank model"
-            )
+
+        if R is not None:
+            if L is not None:
+                raise ValueError("Only one of 'R' and 'L' can be defined")
+            self.R = np.ascontiguousarray(np.atleast_2d(R), dtype=np.float64)
+            try:
+                self.L = np.linalg.cholesky(self.R)
+            except np.linalg.LinAlgError:
+                M = np.copy(self.R)
+                M[np.diag_indices_from(M)] += 1e-10
+                self.L = np.linalg.cholesky(M)
+        elif L is not None:
+            self.L = np.ascontiguousarray(L, dtype=np.float64)
+            if self.L.ndim == 1:
+                self.L = np.reshape(self.L, (-1, 1))
+            self.R = np.dot(self.L, self.L.T)
+        else:
+            raise ValueError("One of 'R' and 'L' must be defined")
+
+        self.M, self.K = self.L.shape
         self.alpha2 = np.diag(self.R)
 
     def __len__(self):
-        return len(self.term) * self.M
+        return len(self.term) * self.K
 
     def __add__(self, b):
         if self.dimension != b.dimension:
@@ -140,7 +151,7 @@ def get_celerite_matrices(
             x, np.zeros_like(x)
         )
         J0 = U_sub.shape[1]
-        J = self._get_J(J0)
+        J = self.K * J0
 
         # Allocate memory as requested
         if a is None:
@@ -167,12 +178,13 @@ def get_celerite_matrices(
         a[:] = (
             diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac))
         ).flatten()
-        U[:] = np.kron(U_sub, self._get_U_kron())
-        V[:] = np.kron(V_sub, self._get_V_kron())
+        U[:] = np.kron(U_sub, self.L)
+        V[:] = np.kron(V_sub, self.L)
 
         c = np.concatenate((cr, cc, cc))
         P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0])
-        self._copy_P(P0, P)
+        if self.K > 1:
+            P[:] = np.tile(P0[:, :, None], (1, 1, self.K)).reshape(P.shape)
 
         return a, U, V, P
 
@@ -188,65 +200,6 @@ def get_conditional_mean_matrices(self, x, t):
             "Conditional mean matrices have not (yet!) been implemented"
         )
 
-    # The following should be implemented by subclasses
-    def _get_J(self, J0):
-        return self.M * J0
-
-    def _get_U_kron(self):
-        return self.R
-
-    def _get_V_kron(self):
-        return np.eye(self.M)
-
-    def _copy_P(self, P0, P):
-        P[:] = np.tile(P0[:, :, None], (1, 1, self.M)).reshape(P.shape)
-
-
-class LowRankKronTerm(KronTerm):
-    """A low rank multivariate celerite term
-
-    A low rank version of :class:`celerite2.kron.KronTerm` where the covariance
-    between outputs is ``R = alpha * alpha^T``, where ``alpha`` is a column
-    vector of size ``M``. More details about this model can be found in `Gordon
-    et al. (2020) <https://arxiv.org/abs/2007.05799>`_.
-
-    Args:
-        term (celerite2.terms.Term): The celerite term describing the
-            underlying process.
-        alpha (shape[M]): The vector of amplitudes for each output.
-    """
-
-    def __init__(self, term, *, alpha):
-        self.term = term
-        self.alpha = np.ascontiguousarray(
-            np.atleast_1d(alpha), dtype=np.float64
-        )
-        if self.alpha.ndim != 1:
-            raise ValueError(
-                "alpha must be a vector; "
-                "use a general KronTerm for a full rank model"
-            )
-        self.M = len(self.alpha)
-        if self.M < 1:
-            raise ValueError("At least one 'band' is required")
-        self.R = np.outer(self.alpha, self.alpha)
-        self.alpha2 = self.alpha ** 2
-
-    def __len__(self):
-        return len(self.term)
-
-    def _get_J(self, J0):
-        return J0
-
-    def _get_U_kron(self):
-        return self.alpha[:, None]
-
-    def _get_V_kron(self):
-        return self.alpha[:, None]
-
-    def _copy_P(self, P0, P):
-        pass
-
 
 class KronTermSum(TermSumGeneral):
     @property
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index 32f45e9..9dd22b4 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 
-__all__ = ["KronTerm", "LowRankKronTerm", "KronTermSum"]
+__all__ = ["KronTerm", "KronTermSum"]
 from theano import tensor as tt
-from theano.tensor.slinalg import kron
+from theano.tensor.slinalg import Cholesky, kron
 
 from .. import kron as base_kron
 from .terms import Term, TermSumGeneral
@@ -31,6 +31,9 @@
 )
 
 
+cholesky = Cholesky(lower=True)
+
+
 class KronTerm(Term):
     __requires_general_addition__ = True
     __doc__ = base_kron.KronTerm.__doc__
@@ -40,15 +43,29 @@ class KronTerm(Term):
     def dimension(self):
         return self.M
 
-    def __init__(self, term, *, R):
+    def __init__(self, term, *, R=None, L=None):
         self.term = term
-        self.R = tt.as_tensor_variable(R)
-        self.M = self.R.shape[0]
-        if self.R.ndim != 2:
-            raise ValueError(
-                "R must be a square matrix; "
-                "use a LowRankKronTerm for the low rank model"
-            )
+
+        if R is not None:
+            if L is not None:
+                raise ValueError("Only one of 'R' and 'L' can be defined")
+            self.R = tt.as_tensor_variable(R)
+            self.L = cholesky(self.R + 1e-10 * tt.eye(R.shape[0]))
+            self.M = self.R.shape[0]
+            self.K = self.R.shape[0]
+
+        elif L is not None:
+            self.L = tt.as_tensor_variable(L)
+            self.M = self.L.shape[0]
+            if self.L.ndim == 1:
+                self.L = tt.reshape(self.L, (-1, 1))
+                self.K = 1
+            else:
+                self.K = self.L.shape[1]
+            self.R = tt.dot(self.L, self.L.T)
+        else:
+            raise ValueError("One of 'R' and 'L' must be defined")
+
         self.alpha2 = tt.diag(self.R)
 
     def __add__(self, b):
@@ -104,12 +121,12 @@ def get_celerite_matrices(self, x, diag):
         a = tt.reshape(
             diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,)
         )
-        U = kron(U_sub, self._get_U_kron())
-        V = kron(V_sub, self._get_V_kron())
+        U = kron(U_sub, self.L)
+        V = kron(V_sub, self.L)
 
         c = tt.concatenate((cr, cc, cc))
         P0 = tt.exp(-c[None, :] * dx[:, None])
-        P = self._copy_P(P0)
+        P = tt.tile(P0[:, :, None], (1, 1, self.K)).reshape((P0.shape[0], -1))
 
         return a, U, V, P
 
@@ -118,43 +135,6 @@ def get_conditional_mean_matrices(self, x, t):
             "Conditional mean matrices have not (yet!) been implemented"
         )
 
-    # The following should be implemented by subclasses
-    def _get_U_kron(self):
-        return self.R
-
-    def _get_V_kron(self):
-        return tt.eye(self.M)
-
-    def _copy_P(self, P0):
-        return tt.tile(P0[:, :, None], (1, 1, self.M)).reshape(
-            (P0.shape[0], -1)
-        )
-
-
-class LowRankKronTerm(KronTerm):
-    __doc__ = base_kron.LowRankKronTerm.__doc__
-
-    def __init__(self, term, *, alpha):
-        self.term = term
-        self.alpha = tt.as_tensor_variable(alpha)
-        if self.alpha.ndim != 1:
-            raise ValueError(
-                "alpha must be a vector; "
-                "use a general KronTerm for a full rank model"
-            )
-        self.M = self.alpha.shape[0]
-        self.R = self.alpha[None, :] * self.alpha[:, None]
-        self.alpha2 = self.alpha ** 2
-
-    def _get_U_kron(self):
-        return self.alpha[:, None]
-
-    def _get_V_kron(self):
-        return self.alpha[:, None]
-
-    def _copy_P(self, P0):
-        return P0
-
 
 class KronTermSum(TermSumGeneral):
     __citations__ = CITATIONS
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 15e6a1f..8d18722 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -125,7 +125,7 @@ def test_low_rank_value():
 
     alpha = np.random.randn(M)
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-    term = kron.LowRankKronTerm(term0, alpha=alpha)
+    term = kron.KronTerm(term0, L=alpha)
 
     a, U, V, P = term.get_celerite_matrices(x, diag)
     assert a.shape == (N * M,)
@@ -156,7 +156,7 @@ def test_sum_value():
     R = np.dot(R, R.T)
 
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-    term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha)
+    term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha)
 
     a, U, V, P = term.get_celerite_matrices(x, diag)
     assert a.shape == (N * M,)
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index 9a98d3e..8d27d3c 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -104,7 +104,7 @@ def test_low_rank_value(data):
 
     alpha = np.random.randn(M)
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-    term = kron.LowRankKronTerm(term0, alpha=alpha)
+    term = kron.KronTerm(term0, L=alpha)
 
     a, U, V, P = term.get_celerite_matrices(x, diag)
     assert a.eval().shape == (N * M,)
@@ -130,7 +130,7 @@ def test_sum_value(data):
     R = np.dot(R, R.T)
 
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
-    term = kron.KronTerm(term0, R=R) + kron.LowRankKronTerm(term0, alpha=alpha)
+    term = kron.KronTerm(term0, R=R) + kron.KronTerm(term0, L=alpha)
 
     a, U, V, P = term.get_celerite_matrices(x, diag)
     assert a.eval().shape == (N * M,)

From 3865fbe1cb191ddcd648dbf1e50873c2430a17d1 Mon Sep 17 00:00:00 2001
From: Dan F-M <foreman.mackey@gmail.com>
Date: Mon, 2 Nov 2020 14:58:12 -0500
Subject: [PATCH 29/29] adding initial support for missing data

---
 python/celerite2/celerite2.py    |  93 ++++++++++++++++++++-----
 python/celerite2/kron.py         |  44 +++++++++---
 python/celerite2/terms.py        |  53 ++++++++++++--
 python/celerite2/theano/kron.py  |  25 ++++---
 python/celerite2/theano/terms.py |  19 ++++--
 python/test/test_celerite2.py    |   3 -
 python/test/test_kron.py         | 114 +++++++++++++++++++------------
 python/test/theano/test_kron.py  |  20 ++++++
 8 files changed, 276 insertions(+), 95 deletions(-)

diff --git a/python/celerite2/celerite2.py b/python/celerite2/celerite2.py
index b8474de..4099ad1 100644
--- a/python/celerite2/celerite2.py
+++ b/python/celerite2/celerite2.py
@@ -38,6 +38,7 @@ def __init__(self, kernel, t=None, *, mean=0.0, **kwargs):
         self._t = None
         self._mean_value = None
         self._diag = None
+        self._mask = None
         self._size = None
         self._shape = None
         self._log_det = -np.inf
@@ -72,7 +73,14 @@ def mean_value(self):
         return self._mean_value
 
     def compute(
-        self, t, *, yerr=None, diag=None, check_sorted=True, quiet=False
+        self,
+        t,
+        *,
+        yerr=None,
+        diag=None,
+        mask=None,
+        check_sorted=True,
+        quiet=False,
     ):
         """Compute the Cholesky factorization of the GP covariance matrix
 
@@ -117,7 +125,7 @@ def compute(
             self._shape = (N,)
 
         self._diag = np.empty(self._shape)
-        self._size = self._diag.size
+        self._full_size = self._diag.size
         if yerr is None and diag is None:
             self._diag[:] = 0.0
 
@@ -138,6 +146,10 @@ def compute(
                 )
             self._diag[:] = diag
 
+        self._mask = None
+        if mask is not None:
+            self._mask = np.ascontiguousarray(mask, dtype=bool)
+
         # Fill the celerite matrices
         (
             self._d,
@@ -145,9 +157,18 @@ def compute(
             self._V,
             self._P,
         ) = self.kernel.get_celerite_matrices(
-            self._t, self._diag, a=self._d, U=self._U, V=self._V, P=self._P
+            self._t,
+            self._diag,
+            a=self._d,
+            U=self._U,
+            V=self._V,
+            P=self._P,
+            mask=self._mask,
         )
 
+        # Get the correct size even with a mask
+        self._size = self._d.size
+
         # Compute the Cholesky factorization
         try:
             self._d, self._W = driver.factor(
@@ -183,7 +204,11 @@ def recompute(self, *, quiet=False):
                 "You must call 'compute' directly  at least once"
             )
         return self.compute(
-            self._t, diag=self._diag, check_sorted=False, quiet=quiet
+            self._t,
+            diag=self._diag,
+            mask=self._mask,
+            check_sorted=False,
+            quiet=quiet,
         )
 
     def _process_input(self, y, *, inplace=False, require_vector=False):
@@ -212,9 +237,9 @@ def _process_input(self, y, *, inplace=False, require_vector=False):
             )
 
         if len(self._shape) == 2:
-            y = np.reshape(y, (self._size,) + y.shape[2:])
+            y = np.reshape(y, (self._full_size,) + y.shape[2:])
 
-        if require_vector and (self._size,) != y.shape:
+        if require_vector and (self._full_size,) != y.shape:
             raise ValueError("'y' must be one dimensional")
 
         return y
@@ -269,9 +294,16 @@ def dot_tril(self, y, *, inplace=False):
             ValueError: When the inputs are not valid (shape, number, etc.).
         """
         y = self._process_input(y, inplace=inplace)
-        return self._reshape_output(
-            driver.dot_tril(self._U, self._P, self._d, self._W, y)
-        )
+
+        if self._mask is None:
+            alpha = driver.dot_tril(self._U, self._P, self._d, self._W, y)
+        else:
+            alpha = np.zeros((self._full_size,) + y.shape[1:])
+            alpha[self._mask.flatten()] = driver.dot_tril(
+                self._U, self._P, self._d, self._W, y[self._mask.flatten()]
+            )
+
+        return self._reshape_output(alpha)
 
     def log_likelihood(self, y, *, inplace=False):
         """Compute the marginalized likelihood of the GP model
@@ -298,9 +330,14 @@ def log_likelihood(self, y, *, inplace=False):
         )
         if not np.isfinite(self._log_det):
             return -np.inf
-        loglike = self._norm - 0.5 * driver.norm(
-            self._U, self._P, self._d, self._W, y
-        )
+        if self._mask is None:
+            loglike = self._norm - 0.5 * driver.norm(
+                self._U, self._P, self._d, self._W, y
+            )
+        else:
+            loglike = self._norm - 0.5 * driver.norm(
+                self._U, self._P, self._d, self._W, y[self._mask.flatten()]
+            )
         if not np.isfinite(loglike):
             return -np.inf
         return loglike
@@ -349,7 +386,18 @@ def predict(
         y = self._process_input(
             y - mean_value, inplace=True, require_vector=True
         )
-        alpha = driver.solve(self._U, self._P, self._d, self._W, np.copy(y))
+        if self._mask is None:
+            alpha = driver.solve(
+                self._U, self._P, self._d, self._W, np.copy(y)
+            )
+        else:
+            alpha = driver.solve(
+                self._U,
+                self._P,
+                self._d,
+                self._W,
+                np.copy(y[self._mask.flatten()]),
+            )
 
         if t is None:
             xs = self._t
@@ -364,7 +412,7 @@ def predict(
         if kernel is None:
             kernel = self.kernel
 
-            if t is None:
+            if t is None and self._mask is None:
                 mu = self._reshape_output(y - self._diag.flatten() * alpha)
                 if include_mean:
                     mu += mean_value
@@ -399,6 +447,8 @@ def predict(
 
         if mu is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
+            if self._mask is not None:
+                KxsT = KxsT[self._mask.flatten(), :]
             mu = self._reshape_output(np.dot(KxsT.T, alpha))
             if include_mean:
                 mu += self._mean(xs)
@@ -409,6 +459,8 @@ def predict(
         # Predictive variance.
         if KxsT is None:
             KxsT = kernel.get_value(xs[None, :] - self._t[:, None])
+            if self._mask is not None:
+                KxsT = KxsT[self._mask.flatten(), :]
         if return_var:
             var = np.squeeze(
                 np.diag(kernel.get_value(0.0))
@@ -451,11 +503,18 @@ def sample(self, *, size=None, include_mean=True):
         if self._t is None:
             raise RuntimeError("you must call 'compute' first")
         if size is None:
-            n = np.random.randn(*self._shape)
+            n = np.random.randn(self._size)
         else:
-            n = np.random.randn(*(self._shape + (size,)))
+            n = np.random.randn(self._size, size)
+
+        n = driver.dot_tril(self._U, self._P, self._d, self._W, n)
+
+        if self._mask is not None:
+            tmp = np.zeros((self._full_size,) + n.shape[1:])
+            tmp[self._mask.flatten()] = n
+            n = tmp
 
-        result = self.dot_tril(n, inplace=True)
+        result = np.reshape(n, self._shape + n.shape[1:])
         if size is not None:
             result = np.moveaxis(result, -1, 0)
         if include_mean:
diff --git a/python/celerite2/kron.py b/python/celerite2/kron.py
index c7a055d..0a22eab 100644
--- a/python/celerite2/kron.py
+++ b/python/celerite2/kron.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 
+from . import driver
 from .terms import Term, TermSumGeneral
 
 
@@ -114,7 +115,15 @@ def get_psd(self, omega):
         return self.term.get_psd(omega)
 
     def get_celerite_matrices(
-        self, x, diag, *, a=None, U=None, V=None, P=None
+        self,
+        x,
+        diag,
+        *,
+        a=None,
+        U=None,
+        V=None,
+        P=None,
+        mask=None,
     ):
         """Get the matrices needed to solve the celerite system
 
@@ -143,7 +152,14 @@ def get_celerite_matrices(
         N0, M = diag.shape
         if len(x) != N0 or M != self.M:
             raise ValueError("'diag' must have the shape (N, M)")
-        N = N0 * M
+
+        if mask is None:
+            N = N0 * M
+        else:
+            mask = np.ascontiguousarray(mask, dtype=bool)
+            if mask.shape != (N0, M):
+                raise ValueError("'mask' must have the shape (N, M)")
+            N = mask.sum()
 
         # Compute the coefficients and kernel for the celerite subproblem
         ar, cr, ac, _, cc, _ = self.term.get_coefficients()
@@ -171,16 +187,22 @@ def get_celerite_matrices(
         else:
             P.resize((N - 1, J), refcheck=False)
 
-        # Expand the times appropriately
-        x_full = np.tile(x[:, None], (1, self.M)).flatten()
-        dx = x_full[1:] - x_full[:-1]
-
-        a[:] = (
-            diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac))
-        ).flatten()
-        U[:] = np.kron(U_sub, self.L)
-        V[:] = np.kron(V_sub, self.L)
+        if mask is None:
+            x_full = np.tile(x[:, None], (1, self.M)).flatten()
+            a[:] = (
+                diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac))
+            ).flatten()
+            U[:] = np.kron(U_sub, self.L)
+            V[:] = np.kron(V_sub, self.L)
+        else:
+            x_full = np.tile(x[:, None], (1, self.M))[mask].flatten()
+            a[:] = (diag + self.alpha2[None, :] * (np.sum(ar) + np.sum(ac)))[
+                mask
+            ].flatten()
+            U[:] = np.kron(U_sub, self.L)[mask.flatten()]
+            V[:] = np.kron(V_sub, self.L)[mask.flatten()]
 
+        dx = x_full[1:] - x_full[:-1]
         c = np.concatenate((cr, cc, cc))
         P0 = np.exp(-c[None, :] * dx[:, None], out=P[:, :J0])
         if self.K > 1:
diff --git a/python/celerite2/terms.py b/python/celerite2/terms.py
index eafeadb..62e86ac 100644
--- a/python/celerite2/terms.py
+++ b/python/celerite2/terms.py
@@ -71,7 +71,7 @@ def to_dense(self, x, diag):
         K[np.diag_indices_from(K)] += diag
         return K
 
-    def dot(self, x, diag, y):
+    def dot(self, x, diag, y, **kwargs):
         """Apply a matrix-vector or matrix-matrix product
 
         Args:
@@ -80,7 +80,7 @@ def dot(self, x, diag, y):
             y (shape[N] or shape[N, K]): The target of vector or matrix for
                 this operation.
         """
-        a, U, V, P = self.get_celerite_matrices(x, diag)
+        a, U, V, P = self.get_celerite_matrices(x, diag, **kwargs)
 
         y = np.atleast_1d(y)
         if y.shape[0] != len(a):
@@ -153,7 +153,15 @@ def get_psd(self, omega):
         return np.sqrt(2 / np.pi) * psd
 
     def get_celerite_matrices(
-        self, x, diag, *, a=None, U=None, V=None, P=None
+        self,
+        x,
+        diag,
+        *,
+        a=None,
+        U=None,
+        V=None,
+        P=None,
+        mask=None,
     ):
         """Get the matrices needed to solve the celerite system
 
@@ -175,6 +183,11 @@ def get_celerite_matrices(
         Raises:
             ValueError: When the inputs are not valid.
         """
+        if mask is not None:
+            raise NotImplementedError(
+                "Missing data is only implemented for KronTerm models"
+            )
+
         x = np.atleast_1d(x)
         diag = np.atleast_1d(diag)
         if len(x.shape) != 1:
@@ -319,7 +332,15 @@ def get_psd(self, omega):
         return p
 
     def get_celerite_matrices(
-        self, x, diag, *, a=None, U=None, V=None, P=None
+        self,
+        x,
+        diag,
+        *,
+        a=None,
+        U=None,
+        V=None,
+        P=None,
+        mask=None,
     ):
         """Get the matrices needed to solve the celerite system
 
@@ -343,7 +364,13 @@ def get_celerite_matrices(
         """
         x = np.atleast_1d(x)
         diag = np.atleast_1d(diag)
-        N = diag.size
+
+        if mask is None:
+            N = diag.size
+        else:
+            mask = np.ascontiguousarray(mask, dtype=bool)
+            N = mask.sum()
+
         sizes = [len(term) for term in self.terms]
         J = sum(sizes)
 
@@ -369,6 +396,7 @@ def get_celerite_matrices(
             x,
             diag,
             a=a,
+            mask=mask,
         )
         for dj, term in zip(sizes[1:], self.terms[1:]):
             (
@@ -379,6 +407,7 @@ def get_celerite_matrices(
             ) = term.get_celerite_matrices(
                 x,
                 np.zeros_like(diag),
+                mask=mask,
             )
             a[:] += da
             j += dj
@@ -524,7 +553,15 @@ def __len__(self):
         return len(self.term)
 
     def get_celerite_matrices(
-        self, x, diag, *, a=None, U=None, V=None, P=None
+        self,
+        x,
+        diag,
+        *,
+        a=None,
+        U=None,
+        V=None,
+        P=None,
+        mask=None,
     ):
         dt = self.delta
         ar, cr, a, b, c, d = self.term.get_coefficients()
@@ -555,7 +592,9 @@ def get_celerite_matrices(
 
         new_diag = diag + delta_diag
 
-        return super().get_celerite_matrices(x, new_diag, a=a, U=U, V=V, P=P)
+        return super().get_celerite_matrices(
+            x, new_diag, a=a, U=U, V=V, P=P, mask=mask
+        )
 
     def get_coefficients(self):
         ar, cr, a, b, c, d = self.term.get_coefficients()
diff --git a/python/celerite2/theano/kron.py b/python/celerite2/theano/kron.py
index 9dd22b4..fe6d77f 100644
--- a/python/celerite2/theano/kron.py
+++ b/python/celerite2/theano/kron.py
@@ -101,7 +101,7 @@ def to_dense(self, x, diag):
     def get_psd(self, omega):
         return self.term.get_psd(omega)
 
-    def get_celerite_matrices(self, x, diag):
+    def get_celerite_matrices(self, x, diag, mask=None):
         # Check the input dimensions
         x = tt.as_tensor_variable(x)
         diag = tt.as_tensor_variable(diag)
@@ -114,16 +114,25 @@ def get_celerite_matrices(self, x, diag):
             x, tt.zeros_like(x)
         )
 
-        # Expand the times appropriately
-        x_full = tt.reshape(tt.tile(x[:, None], (1, self.M)), (-1,))
-        dx = x_full[1:] - x_full[:-1]
-
-        a = tt.reshape(
-            diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac)), (-1,)
-        )
+        # Pad everything out ignoring the mask
+        x_full = tt.tile(x[:, None], (1, self.M))
+        a = diag + self.alpha2[None, :] * (tt.sum(ar) + tt.sum(ac))
         U = kron(U_sub, self.L)
         V = kron(V_sub, self.L)
 
+        # Apply the mask or flatten
+        if mask is None:
+            x_full = tt.reshape(x_full, (-1,))
+            a = tt.reshape(a, (-1,))
+        else:
+            mask = tt.as_tensor_variable(mask)
+            mask_flat = tt.reshape(mask, (-1,))
+            x_full = x_full[mask]
+            a = a[mask]
+            U = U[mask_flat]
+            V = V[mask_flat]
+
+        dx = x_full[1:] - x_full[:-1]
         c = tt.concatenate((cr, cc, cc))
         P0 = tt.exp(-c[None, :] * dx[:, None])
         P = tt.tile(P0[:, :, None], (1, 1, self.K)).reshape((P0.shape[0], -1))
diff --git a/python/celerite2/theano/terms.py b/python/celerite2/theano/terms.py
index 90d7a8e..8c41fb4 100644
--- a/python/celerite2/theano/terms.py
+++ b/python/celerite2/theano/terms.py
@@ -85,7 +85,12 @@ def to_dense(self, x, diag):
         K += tt.diag(diag)
         return K
 
-    def get_celerite_matrices(self, x, diag):
+    def get_celerite_matrices(self, x, diag, *, mask=None):
+        if mask is not None:
+            raise NotImplementedError(
+                "Missing data is only implemented for KronTerm models"
+            )
+
         x = tt.as_tensor_variable(x)
         diag = tt.as_tensor_variable(diag)
         ar, cr, ac, bc, cc, dc = self.coefficients
@@ -132,8 +137,8 @@ def get_conditional_mean_matrices(self, x, t):
 
         return U_star, V_star, inds
 
-    def dot(self, x, diag, y):
-        a, U, V, P = self.get_celerite_matrices(x, diag)
+    def dot(self, x, diag, y, **kwargs):
+        a, U, V, P = self.get_celerite_matrices(x, diag, **kwargs)
         return ops.matmul(a, U, V, P, tt.as_tensor_variable(y))[0]
 
 
@@ -195,7 +200,7 @@ def get_psd(self, omega):
             p += term.get_psd(omega)
         return p
 
-    def get_celerite_matrices(self, x, diag):
+    def get_celerite_matrices(self, x, diag, *, mask=None):
         x = tt.as_tensor_variable(x)
         diag = tt.as_tensor_variable(diag)
         zeros = tt.zeros_like(diag)
@@ -205,7 +210,7 @@ def get_celerite_matrices(self, x, diag):
         V = []
         P = []
         for term in self.terms:
-            args = term.get_celerite_matrices(x, zeros)
+            args = term.get_celerite_matrices(x, zeros, mask=mask)
             a.append(args[0])
             U.append(args[1])
             V.append(args[2])
@@ -344,7 +349,7 @@ def __init__(self, term, delta, **kwargs):
         self.delta = tt.as_tensor_variable(delta).astype("float64")
         super().__init__(**kwargs)
 
-    def get_celerite_matrices(self, x, diag):
+    def get_celerite_matrices(self, x, diag, *, mask=None):
         dt = self.delta
         ar, cr, a, b, c, d = self.term.coefficients
 
@@ -373,7 +378,7 @@ def get_celerite_matrices(self, x, diag):
         )
 
         new_diag = diag + delta_diag
-        return super().get_celerite_matrices(x, new_diag)
+        return super().get_celerite_matrices(x, new_diag, mask=mask)
 
     def get_coefficients(self):
         ar, cr, a, b, c, d = self.term.coefficients
diff --git a/python/test/test_celerite2.py b/python/test/test_celerite2.py
index 4b130d9..c41f26a 100644
--- a/python/test/test_celerite2.py
+++ b/python/test/test_celerite2.py
@@ -73,9 +73,6 @@ def test_consistency(oterm, mean, data):
         dict(return_cov=False, return_var=True),
         dict(return_cov=True, return_var=False),
     ]:
-        print(args)
-        print(original_gp.predict(y, **args))
-        print(gp.predict(y, **args))
         assert all(
             np.allclose(a, b)
             for a, b in zip(
diff --git a/python/test/test_kron.py b/python/test/test_kron.py
index 8d18722..f93602c 100644
--- a/python/test/test_kron.py
+++ b/python/test/test_kron.py
@@ -1,14 +1,26 @@
 # -*- coding: utf-8 -*-
 import numpy as np
+import pytest
 
 from celerite2 import GaussianProcess, kron, terms
 
 
-def check_value(term, x, diag, y, t):
+@pytest.fixture
+def data():
+    N = 100
+    M = 5
+    np.random.seed(105)
+    x = np.sort(np.random.uniform(0, 10, N))
+    t = np.sort(np.random.uniform(-1, 11, 25))
+    diag = np.random.uniform(0.1, 0.5, (N, M))
+    y = np.random.randn(N * M, 3)
+    return N, M, x, diag, y, t
+
+
+def check_value(term, x, diag, y, t, mask=None):
     N, M = diag.shape
 
     K = term.get_value(x[:, None] - x[None, :])
-
     try:
         K0 = term.term.get_value(x[:, None] - x[None, :])
     except AttributeError:
@@ -16,32 +28,48 @@ def check_value(term, x, diag, y, t):
     else:
         assert np.allclose(np.kron(K0, term.R), K)
 
-    K[np.diag_indices_from(K)] += diag.flatten()
-    K0 = term.dot(x, diag, np.eye(diag.size))
+    mask0 = mask
+    if mask is not None:
+        K00 = K[:, mask.flatten()]
+        K = K00[mask.flatten(), :]
+    else:
+        K00 = np.copy(K)
+        mask = np.ones_like(diag, dtype=bool)
+
+    K[np.diag_indices_from(K)] += diag.flatten()[mask.flatten()]
+
+    K0 = term.dot(x, diag, np.eye(mask.sum()), mask=mask)
     assert np.allclose(K, K0)
-    assert np.allclose(np.dot(K, y), term.dot(x, diag, y))
+    assert np.allclose(
+        np.dot(K, y[mask.flatten()]),
+        term.dot(x, diag, y[mask.flatten()], mask=mask),
+    )
 
-    gp = GaussianProcess(term, t=x, diag=diag)
+    gp = GaussianProcess(term, t=x, diag=diag, mask=mask0)
 
     # "log_likelihood" method
     yval = y[:, 0].reshape((N, M))
-    alpha = np.linalg.solve(K, y[:, 0])
+    alpha = np.linalg.solve(K, y[mask.flatten(), 0])
     loglike = -0.5 * (
-        np.dot(y[:, 0], alpha)
+        np.dot(y[mask.flatten(), 0], alpha)
         + np.linalg.slogdet(K)[1]
         + len(K) * np.log(2 * np.pi)
     )
     assert np.allclose(loglike, gp.log_likelihood(yval))
 
     # Predict
-    K0 = K - np.diag(diag.flatten())
+    K0 = K00
     mu0 = np.dot(K0, alpha)
-    cov0 = K0 - np.dot(K0, np.linalg.solve(K, K0.T))
+    cov0 = term.get_value(x[:, None] - x[None, :]) - np.dot(
+        K0, np.linalg.solve(K, K0.T)
+    )
 
     mu, var = gp.predict(yval, return_var=True)
     _, cov = gp.predict(yval, return_cov=True)
-    assert np.allclose(mu, mu0.reshape((N, M)))
-    assert np.allclose(var, np.diag(cov0).reshape((N, M)))
+    assert mu.shape == (N, M)
+    assert var.shape == (N, M)
+    assert np.allclose(mu.flatten(), mu0)
+    assert np.allclose(var.flatten(), np.diag(cov0))
     assert np.allclose(cov, cov0.reshape((N, M, N, M)))
 
     mu1, var1 = gp.predict(yval, t=x, return_var=True)
@@ -50,7 +78,7 @@ def check_value(term, x, diag, y, t):
     assert np.allclose(var, var1)
     assert np.allclose(cov, cov1)
 
-    K0 = term.get_value(t[:, None] - x[None, :])
+    K0 = term.get_value(t[:, None] - x[None, :])[:, mask.flatten()]
     mu0 = np.dot(K0, alpha)
     cov0 = term.get_value(t[:, None] - t[None, :]) - np.dot(
         K0, np.linalg.solve(K, K0.T)
@@ -67,14 +95,15 @@ def check_value(term, x, diag, y, t):
     a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K)))
     np.random.seed(seed)
     b = gp.sample()
-    assert np.allclose(a.reshape((N, M)), b)
+    assert np.allclose(a, b[mask])
 
     np.random.seed(seed)
     a = np.dot(np.linalg.cholesky(K), np.random.randn(len(K), 10))
     np.random.seed(seed)
     b = gp.sample(size=10)
     assert np.allclose(
-        np.ascontiguousarray(np.moveaxis(a, -1, 0)).reshape((10, N, M)), b
+        np.moveaxis(a, -1, 0),
+        b[:, mask],
     )
 
     # "sample_conditional" method, numerics make this one a little unstable;
@@ -86,15 +115,8 @@ def check_value(term, x, diag, y, t):
     assert b.shape == (10, len(t), M)
 
 
-def test_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_value(data):
+    N, M, x, diag, y, t = data
 
     R = np.random.randn(M, M)
     R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
@@ -113,15 +135,8 @@ def test_value():
     check_value(term, x, diag, y, t)
 
 
-def test_low_rank_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_low_rank_value(data):
+    N, M, x, diag, y, t = data
 
     alpha = np.random.randn(M)
     term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
@@ -139,15 +154,8 @@ def test_low_rank_value():
     assert np.allclose(full_term.dot(x, diag, y), term.dot(x, diag, y))
 
 
-def test_sum_value():
-    N = 100
-    M = 5
-
-    np.random.seed(105)
-    x = np.sort(np.random.uniform(0, 10, N))
-    t = np.sort(np.random.uniform(-1, 11, 25))
-    diag = np.random.uniform(0.1, 0.5, (N, M))
-    y = np.random.randn(N * M, 3)
+def test_sum_value(data):
+    N, M, x, diag, y, t = data
 
     alpha = np.random.randn(M)
     R = np.random.randn(M, M)
@@ -165,3 +173,25 @@ def test_sum_value():
     assert P.shape == (N * M - 1, 2 * M + 2)
 
     check_value(term, x, diag, y, t)
+
+
+def test_missing_values(data):
+    N, M, x, diag, y, t = data
+    mask = np.random.rand(N, M) > 0.1
+    assert np.all(mask.sum(axis=1) > 0)
+
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R)
+
+    a, U, V, P = term.get_celerite_matrices(x, diag, mask=mask)
+    assert a.shape == (mask.sum(),)
+    assert U.shape == (mask.sum(), 2 * M)
+    assert V.shape == (mask.sum(), 2 * M)
+    assert P.shape == (mask.sum() - 1, 2 * M)
+
+    check_value(term, x, diag, y, t, mask=mask)
diff --git a/python/test/theano/test_kron.py b/python/test/theano/test_kron.py
index 8d27d3c..e42e2b1 100644
--- a/python/test/theano/test_kron.py
+++ b/python/test/theano/test_kron.py
@@ -149,3 +149,23 @@ def test_citations(data):
         gp = GaussianProcess(term, t=x, diag=diag)
         gp.marginal("obs", observed=y[:, 0].reshape((N, M)))
         assert model.__citations__["celerite2:kernel"] == kron.CITATIONS
+
+
+def test_missing_values(data):
+    N, M, x, diag, y, t = data
+    mask = np.random.rand(N, M) > 0.1
+    assert np.all(mask.sum(axis=1) > 0)
+
+    R = np.random.randn(M, M)
+    R[np.diag_indices_from(R)] = np.exp(R[np.diag_indices_from(R)])
+    R[np.triu_indices_from(R, 1)] = 0.0
+    R = np.dot(R, R.T)
+
+    term0 = terms.SHOTerm(sigma=1.5, rho=1.3, Q=0.3)
+    term = kron.KronTerm(term0, R=R)
+
+    K = term.get_value(x[:, None] - x[None, :]).eval()
+    K[np.diag_indices_from(K)] += diag.flatten()
+    K = K[mask.flatten(), :][:, mask.flatten()]
+    K0 = term.dot(x, diag, np.eye(mask.sum()), mask=mask).eval()
+    assert np.allclose(K0.shape, K.shape)