diff --git a/diffxpy/models/__init__.py b/diffxpy/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/diffxpy/models/batch_bfgs/__init__.py b/diffxpy/models/batch_bfgs/__init__.py
deleted file mode 100644
index a5389bb..0000000
--- a/diffxpy/models/batch_bfgs/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-
-__version__ = "0.1"
diff --git a/diffxpy/models/batch_bfgs/objectives.py b/diffxpy/models/batch_bfgs/objectives.py
deleted file mode 100644
index ee47b1e..0000000
--- a/diffxpy/models/batch_bfgs/objectives.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-import numpy.random
-import scipy.stats
-
-
-def clip_theta_mu(theta):
-    return np.maximum(np.minimum(theta, np.zeros(len(theta)) + 1e8), np.zeros(len(theta)) - 1e8)
-
-
-def clip_theta_disp(theta):
-    return np.maximum(np.minimum(theta, np.zeros(len(theta)) + 1e8), np.zeros(len(theta)) - 1e8)
-
-
-def nb_glm_linker_mu(theta, X, lib_size):
-    return np.exp(np.asarray(np.dot(X, np.asarray(clip_theta_mu(theta)).T)).flatten() + lib_size)
-
-
-def nb_glm_linker_disp(theta, X):
-    return np.asarray(np.exp(np.dot(X, np.asarray(clip_theta_disp(theta)).T))).flatten()
-
-
-def ll_nb(x, mu, disp):
-    # Re-parameterize.
-    variance = np.maximum(mu + np.square(mu) / disp, np.zeros(len(mu)) + 1e-8)
-    p = 1 - (mu / variance)
-    return scipy.stats.nbinom(n=disp, p=1 - p).logpmf(x)
-
-
-def objective_ll(x, theta_mu, theta_disp, design_loc, design_scale, lib_size):
-    return -ll_nb(x=x, mu=nb_glm_linker_mu(theta_mu, design_loc, lib_size),
-                  disp=nb_glm_linker_disp(theta_disp, design_scale))
-
-
-def objective(theta, x, design_loc, design_scale, lib_size, batch_size=100):
-    if batch_size is None:
-        J = np.sum(objective_ll(x=x,
-                                theta_mu=np.asarray(theta)[:design_loc.shape[1]],
-                                theta_disp=np.asarray(theta)[design_loc.shape[1]:],
-                                design_loc=design_loc,
-                                design_scale=design_scale,
-                                lib_size=lib_size))
-    else:
-        batch_idx = numpy.random.randint(low=0, high=x.shape[0], size=(batch_size))
-        J = np.sum(objective_ll(x=x[batch_idx],
-                                theta_mu=np.asarray(theta)[:design_loc.shape[1]],
-                                theta_disp=np.asarray(theta)[design_loc.shape[1]:],
-                                design_loc=design_loc[batch_idx, :],
-                                design_scale=design_scale[batch_idx, :],
-                                lib_size=lib_size[batch_idx]))
-    return J
diff --git a/diffxpy/models/batch_bfgs/optim.py b/diffxpy/models/batch_bfgs/optim.py
deleted file mode 100644
index a593aed..0000000
--- a/diffxpy/models/batch_bfgs/optim.py
+++ /dev/null
@@ -1,231 +0,0 @@
-import logging
-
-from scipy.optimize import minimize
-from scipy.sparse import csr_matrix
-import numpy as np
-from numpy.linalg import pinv
-import numpy.random
-from multiprocessing import Pool
-
-from .objectives import *
-
-logger = logging.getLogger(__name__)
-
-
-class Estim_BFGS_Model():
-
-    def __init__(self, Estim_BFGS, nproc):
-        self._num_observations = Estim_BFGS.x.shape[0]
-        self._num_features = Estim_BFGS.x.shape[1]
-        self._features = Estim_BFGS.feature_names
-        self._observations = Estim_BFGS.x.shape[0]
-        self._design_loc = Estim_BFGS.design_loc
-        self._design_scale = Estim_BFGS.design_scale
-        self._loss = Estim_BFGS.full_loss(nproc)
-        self._log_probs = -self._loss
-        self._probs = np.exp(self._log_probs)
-        self._mles = np.transpose(Estim_BFGS.mles())
-        self._gradient = np.zeros([Estim_BFGS.x.shape[1]])
-        self._fisher_inv = Estim_BFGS.fisher_inv
-        self._idx_loc = np.arange(0, Estim_BFGS.design_loc.shape[1])
-        self._idx_scale = np.arange(Estim_BFGS.design_loc.shape[1],
-                                    Estim_BFGS.design_loc.shape[1] + Estim_BFGS.design_scale.shape[1])
-        self._error_codes = Estim_BFGS.error_codes()
-        self._niter = Estim_BFGS.niter()
-        self._estim_bfgs = Estim_BFGS
-
-    @property
-    def num_observations(self) -> int:
-        return self._num_observations
-
-    @property
-    def num_features(self) -> int:
-        return self._num_features
-
-    @property
-    def features(self) -> np.ndarray:
-        return self._features
-
-    @property
-    def observations(self) -> np.ndarray:
-        return self._observations
-
-    @property
-    def design_loc(self, **kwargs):
-        return self._design_loc
-
-    @property
-    def design_scale(self, **kwargs):
-        return self._design_scale
-
-    @property
-    def probs(self):
-        return self._probs
-
-    # @property
-    def log_probs(self):
-        return self._log_probs
-
-    @property
-    def loss(self, **kwargs):
-        return self._loss
-
-    @property
-    def gradient(self, **kwargs):
-        return self._gradient
-
-    @property
-    def mles(self, **kwargs):
-        return self._mles
-
-    @property
-    def par_link_loc(self, **kwargs):
-        return self._mles[self._idx_loc, :]
-
-    @property
-    def par_link_scale(self, **kwargs):
-        return self._mles[self._idx_scale, :]
-
-    def link_loc(self, x):
-        return np.log(x)
-
-    @property
-    def fisher_inv(self):
-        return self._fisher_inv
-
-    @property
-    def error_codes(self, **kwargs):
-        return self._error_codes
-
-
-class Estim_BFGS():
-    """ Class that handles multiple parallel starts of parameter estimation on one machine.
-    """
-
-    def __init__(self, X, design_loc, design_scale, lib_size=0, batch_size=None, feature_names=None):
-        """ Constructor of ManyGenes()
-        """
-        if np.size(lib_size):
-            lib_size = np.broadcast_to(lib_size, X.shape[0])
-
-        if batch_size is not None:
-            logger.warning("Using BFGS with batching is currently not supported!")
-
-        self.X = X
-        self.design_loc = np.asarray(design_loc)
-        self.design_scale = np.asarray(design_scale)
-        self.lib_size = lib_size
-        self.batch_size = batch_size
-        self.feature_names = feature_names
-        self.__is_sparse = isinstance(X, csr_matrix)
-        self.res = None
-
-    def __init_mu_theta(self, x):
-        if self.design_loc.shape[1] > 1:
-            return np.concatenate([[np.log(np.mean(x) + 1e-08)], np.zeros([self.design_loc.shape[1] - 1])])
-            # return np.zeros([self.design_loc.shape[1]])
-        else:
-            return [np.log(np.mean(x) + 1e-08)]
-
-    def __init_disp_theta(self, x):
-        if self.design_scale.shape[1] > 1:
-            return np.zeros([self.design_scale.shape[1]])
-        else:
-            return [0]
-
-    def get_gene(self, i):
-        """
-
-        Has to be public so that it can be passed via starmap.
-        """
-        if self.__is_sparse:
-            return np.asarray(self.X[:, i].data.todense())
-        else:
-            return np.asarray(self.X[:, i].data)
-
-    def run_optim(self, x, maxiter=10000, debug=False):
-        """ Run single optimisation.
-
-        Has to be public so that it can be passed via starmap.
-
-        Parameters
-        ----------
-        """
-        x0 = np.concatenate([self.__init_mu_theta(x), self.__init_disp_theta(x)])
-
-        if debug:
-            minimize_out = minimize(
-                fun=objective,
-                x0=x0,
-                args=(x, self.design_loc, self.design_scale, self.lib_size, self.batch_size),
-                method='BFGS',
-                options={'maxiter': maxiter, 'gtol': 1e-05}
-            )
-        else:
-            try:
-                minimize_out = minimize(
-                    fun=objective,
-                    x0=x0,
-                    args=(x, self.design_loc, self.design_scale, self.lib_size, self.batch_size),
-                    method='BFGS',
-                    options={'maxiter': maxiter, 'gtol': 1e-05}
-                )
-                err = ''
-            except Exception as e:
-                minimize_out = None
-                err = e
-                print(e)
-
-        return minimize_out
-
-    def run(self, nproc=1, maxiter=10000, debug=False):
-        """ Run multiple optimisation starts
-        
-        Parameters
-        ----------
-        maxiter : int
-            Maximum number of iterations in parameter estimation.
-        nstarts : int 
-            Number of starts to perform in multi-start optimization.
-        nproc : int
-            Number of processes to use in parallelization.
-        """
-        if debug == True:
-            self.res = []  # list of FitResults instances.
-            for i in range(self.X.shape[1]):
-                self.res.append(self.run_optim(x=self.get_gene(i), maxiter=maxiter, debug=debug))
-        else:
-            with Pool(processes=nproc) as p:
-                self.res = p.starmap(
-                    self.run_optim,
-                    [(self.get_gene(i), maxiter, False) for i in range(self.X.shape[1])]
-                )
-
-    def full_loss(self, nproc=1):
-        with Pool(processes=nproc) as p:
-            loss = np.asarray(p.starmap(
-                objective,
-                [(x.x, self.get_gene(i), self.design_loc, self.design_scale, self.lib_size, None)
-                 for i, x in enumerate(self.res)]
-            ))
-        return np.asarray(loss)
-
-    def mles(self):
-        return np.vstack([x.x for x in self.res])
-
-    def fim_diags(self):
-        return np.vstack([x.hess_inv.diagonal() for x in self.res])
-
-    @property
-    def fisher_inv(self):
-        return np.stack([x.hess_inv for x in self.res])
-
-    def niter(self):
-        return np.array([x.nit for x in self.res])
-
-    def error_codes(self):
-        return np.array([x.status for x in self.res])
-
-    def return_batchglm_formated_model(self, nproc=1):
-        model = Estim_BFGS_Model(self, nproc)
-        return (model)
diff --git a/diffxpy/models/hessian.py b/diffxpy/models/hessian.py
deleted file mode 100644
index c629cdf..0000000
--- a/diffxpy/models/hessian.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import numpy as np
-from scipy.special import polygamma
-import numpy.linalg
-
-
-def hes_nb_glm_mean_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in mean model block for a given gene.
-
-    Sum the following across cells:
-    $h_{ij} = -x^{m_i}*x^{m_j}*mu*(x/disp)/(1+mu(disp))^2$
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-    :return: float
-        Entry of fisher information matrix in mean model block at position (i,j)
-    """
-    
-    h_ij = -np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * mu * x / disp / np.square(1 + mu / disp)
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_disp_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in dispersion model block for a given gene.
-
-    Sum the following across cells:
-    $$
-    h_{ij} =
-        disp * x^{m_i} * x^{m_j} * [psi_0(disp+x)
-        + psi_0(disp)
-        - mu/(disp+mu)^2 * (disp+x)
-        +(mu-disp) / (disp+mu)
-        + log(disp)
-        + 1 - log(disp+mu)]
-        + disp * psi_1(disp+x)
-        + disp * psi_1(disp)
-    $$
-    
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-    
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-
-    :return: float
-        Entry of fisher information matrix in dispersion model block at position (i,j)
-    """
-    h_ij = (
-            disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * polygamma(n=0, x=disp + x)
-            + polygamma(n=0, x=disp)
-            - mu / np.square(disp + mu) * (disp + x)
-            + (mu - disp) / (disp + mu)
-            + np.log(disp)
-            + 1 - np.log(disp + mu)
-            + disp * polygamma(n=1, x=disp + x)
-            + disp * polygamma(n=1, x=disp)
-    )
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_meandisp_block(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-        i: int,
-        j: int
-):
-    """ Compute entry of hessian in mean-dispersion model block for a given gene.
-
-    Sum the following across cells:
-    Need to multiply by -1 here??????
-    $h_{ij} = mu*x^{m_i}*x^{m_j}*(x-mu)/(1+mu/disp)^2$
-    
-    Make sure that only element wise operations happen here!
-    Do not simplify design matrix elements: they are only 0 or 1 for discrete
-    groups but continuous if space, time, pseudotime or spline basis covariates
-    are supplied!
-    
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    :param i: int
-        Index of first dimension in fisher information matrix which is to be computed.
-    :param j: int
-        Index of second dimension in fisher information matrix which is to be computed
-
-    :return: float
-        Entry of fisher information matrix in mean-dispersion model block at position (i,j)
-    """
-    h_ij = disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * (x - mu) / np.square(1 + mu / disp)
-    return np.sum(h_ij)
-
-
-def hes_nb_glm_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute hessian for a given gene.
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model, #parameters location model + #parameters shape model)
-        Fisher information matrix.
-    """
-    n_par_loc = design_loc.shape[1]
-    n_par_scale = design_scale.shape[1]
-    n_par = n_par_loc + n_par_scale
-    hes = np.zeros([n_par, n_par])
-    # Add in elements by block:
-    # Mean model block:
-    for i in np.arange(0, n_par_loc):
-        for j in np.arange(i, n_par_loc):  # Block is on the diagonal and symmtric.
-            hes[i, j] = hes_nb_glm_mean_block(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale,
-                                              i=i, j=j)
-            hes[j, i] = hes[i, j]
-    # Dispersion model block:
-    for i in np.arange(0, n_par_scale):
-        for j in np.arange(i, n_par_scale):  # Block is on the diagonal and symmtric.
-            hes[n_par_loc + i, n_par_loc + j] = hes_nb_glm_disp_block(x=x, mu=mu, disp=disp, design_loc=design_loc,
-                                                                      design_scale=design_scale, i=i, j=j)
-            hes[n_par_loc + j, n_par_loc + i] = hes[n_par_loc + i, n_par_loc + j]
-    # Mean-dispersion model block:
-    for i in np.arange(0, n_par_loc):
-        for j in np.arange(0, n_par_scale):  # Duplicate block across diagonal but block itself is not symmetric!
-            hes[i, n_par_loc + j] = hes_nb_glm_meandisp_block(x=x, mu=mu, disp=disp, design_loc=design_loc,
-                                                              design_scale=design_scale, i=i, j=j)
-            hes[n_par_loc + j, i] = hes[i, n_par_loc + j]
-    return (hes)
-
-
-def theta_covar_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute model coefficient covariance matrix for a given gene.
-
-    Based on the hessian matrix via fisher information matrix (fim).
-    covar = inv(fim) = inv(-hess)
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix(cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model, #parameters location model + #parameters shape model)
-        Model coefficient covariance matrix.
-    """
-    hes = hes_nb_glm_bygene(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale)
-    return numpy.linalg.pinv(-hes)
-
-
-def theta_sd_bygene(
-        x: np.ndarray,
-        mu: np.ndarray,
-        disp: np.ndarray,
-        design_loc: np.ndarray,
-        design_scale: np.ndarray,
-):
-    """ Compute model coefficient standard deviation vector for a given gene.
-
-    Based on the hessian matrix via fisher information matrix (fim).
-    covar = inv(fim) = inv(-hess)
-    var = diagonal of covar
-
-    :param x: np.ndarray (cells,)
-        Observations for a given gene.
-    :param mu: np.ndarray (cells,)
-        Estimated mean parameters across cells for a given gene.
-    :param disp: np.ndarray (cells,)
-        Estimated dispersion parameters across cells for a given gene.
-    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
-        Design matrix of location model.
-    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
-        Design matrix of shape model.
-    
-    :return: np.ndarray (#parameters location model + #parameters shape model,)
-        Model coefficient standard deviation vector.
-    """
-    
-    hes = hes_nb_glm_bygene(x=x, mu=mu, disp=disp, design_loc=design_loc, design_scale=design_scale)
-    return np.sqrt(numpy.linalg.pinv(-hes).diagonal())
diff --git a/diffxpy/stats/stats.py b/diffxpy/stats/stats.py
index 3080224..74c9beb 100644
--- a/diffxpy/stats/stats.py
+++ b/diffxpy/stats/stats.py
@@ -263,7 +263,6 @@ def wald_test_chisq(
             raise ValueError('stats.wald_test(): theta_mle and theta0 have to contain the same number of entries')
 
     theta_diff = theta_mle - theta0
-    # Convert to nd.array to avoid gufunc error.
     wald_statistic = np.array([
         np.matmul(
             np.matmul(
diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py
index 8c01ce6..cc727a0 100644
--- a/diffxpy/testing/det.py
+++ b/diffxpy/testing/det.py
@@ -15,7 +15,6 @@
 from .utils import split_x, dmat_unique
 from ..stats import stats
 from . import correction
-from diffxpy import pkg_constants
 
 logger = logging.getLogger("diffxpy")
 
@@ -822,7 +821,7 @@ def summary(
         :return: Summary table of differential expression test.
         """
         res = super().summary(**kwargs)
-        res["grad"] = self.model_gradient.data
+        res["grad"] = self.model_gradient
         if len(self.theta_mle.shape) == 1:
             res["coef_mle"] = self.theta_mle
         if len(self.theta_sd.shape) == 1:
@@ -945,7 +944,7 @@ def plot_comparison_ols_coef(
         import matplotlib.pyplot as plt
         from matplotlib import gridspec
         from matplotlib import rcParams
-        from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+        from batchglm.api.models.tf1.glm_norm import Estimator, InputDataGLM
 
         plt.ioff()
 
@@ -1084,7 +1083,7 @@ def plot_comparison_ols_pred(
         import matplotlib.pyplot as plt
         from matplotlib import gridspec
         from matplotlib import rcParams
-        from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+        from batchglm.api.models import Estimator, InputDataGLM
 
         plt.ioff()
 
@@ -1819,801 +1818,27 @@ def __init__(self, correction_type: str):
             - "global": correct all p-values in one operation
             - "by_test": correct the p-values of each test individually
         """
-        super().__init__()
-        self._correction_type = correction_type
-
-    def _correction(self, method):
-        if self._correction_type.lower() == "global":
-            pvals = np.reshape(self.pval, -1)
-            qvals = correction.correct(pvals=pvals, method=method)
-            qvals = np.reshape(qvals, self.pval.shape)
-            return qvals
-        elif self._correction_type.lower() == "by_test":
-            qvals = np.apply_along_axis(
-                func1d=lambda pv: correction.correct(pvals=pv, method=method),
-                axis=-1,
-                arr=self.pval,
-            )
-            return qvals
-
-    def summary(self, **kwargs) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :return: pandas.DataFrame with the following columns:
-
-            - gene: the gene id's
-            - pval: the minimum per-gene p-value of all tests
-            - qval: the minimum per-gene q-value of all tests
-            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        # calculate maximum logFC of lower triangular fold change matrix
-        raw_logfc = self.log2_fold_change()
-
-        # first flatten all dimensions up to the last 'gene' dimension
-        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
-        # next, get argmax of flattened logfc and unravel the true indices from it
-        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
-        # if logfc is maximal in the lower triangular matrix, multiply it with -1
-        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
-
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            # return minimal pval by gene:
-            "pval": np.min(self.pval.reshape(-1, self.pval.shape[-1]), axis=0),
-            # return minimal qval by gene:
-            "qval": np.min(self.qval.reshape(-1, self.qval.shape[-1]), axis=0),
-            # return maximal logFC by gene:
-            "log2fc": np.asarray(logfc),
-            # return mean expression across all groups by gene:
-            "mean": np.asarray(self.mean)
-        })
-
-        return res
-
-
-class DifferentialExpressionTestPairwise(_DifferentialExpressionTestMulti):
-    """
-    Pairwise unit_test between more than 2 groups per gene.
-    """
-
-    def __init__(self, gene_ids, pval, logfc, ave, groups, tests, correction_type: str):
-        super().__init__(correction_type=correction_type)
-        self._gene_ids = np.asarray(gene_ids)
-        self._logfc = logfc
-        self._pval = pval
-        self._mean = np.asarray(ave).flatten()
-        self.groups = list(np.asarray(groups))
-        self._tests = tests
-
-        _ = self.qval
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return self._gene_ids
-
-    @property
-    def x(self):
-        return None
-
-    @property
-    def tests(self):
-        """
-        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
-        """
-        if self._tests is None:
-            raise ValueError("Individual tests were not kept!")
-
-        return self._tests
-
-    def log_fold_change(self, base=np.e, **kwargs):
-        """
-        Returns matrix of fold changes per gene
-        """
-        if base == np.e:
-            return self._logfc
-        else:
-            return self._logfc / np.log(base)
-
-    def _check_groups(self, group1, group2):
-        if group1 not in self.groups:
-            raise ValueError('group1 not recognized')
-        if group2 not in self.groups:
-            raise ValueError('group2 not recognized')
-
-    def pval_pair(self, group1, group2):
-        """
-        Get p-values of the comparison of group1 and group2.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :return: p-values
-        """
-        assert self._pval is not None
-
-        self._check_groups(group1, group2)
-        return self._pval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def qval_pair(self, group1, group2):
-        """
-        Get q-values of the comparison of group1 and group2.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :return: q-values
-        """
-        assert self._qval is not None
-
-        self._check_groups(group1, group2)
-        return self._qval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def log10_pval_pair_clean(self, group1, group2, log10_threshold=-30):
-        """
-        Return log10 transformed and cleaned p-values.
-
-        NaN p-values are set to one and p-values below log10_threshold
-        in log10 space are set to log10_threshold.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param log10_threshold: minimal log10 p-value to return.
-        :return: Cleaned log10 transformed p-values.
-        """
-        pvals = np.reshape(self.pval_pair(group1=group1, group2=group2), -1)
-        pvals = np.nextafter(0, 1, out=pvals, where=pvals == 0)
-        log10_pval_clean = np.log(pvals) / np.log(10)
-        log10_pval_clean[np.isnan(log10_pval_clean)] = 1
-        log10_pval_clean = np.clip(log10_pval_clean, log10_threshold, 0, log10_pval_clean)
-        return log10_pval_clean
-
-    def log10_qval_pair_clean(self, group1, group2, log10_threshold=-30):
-        """
-        Return log10 transformed and cleaned q-values.
-
-        NaN p-values are set to one and q-values below log10_threshold
-        in log10 space are set to log10_threshold.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param log10_threshold: minimal log10 q-value to return.
-        :return: Cleaned log10 transformed q-values.
-        """
-        qvals = np.reshape(self.qval_pair(group1=group1, group2=group2), -1)
-        qvals = np.nextafter(0, 1, out=qvals, where=qvals == 0)
-        log10_qval_clean = np.log(qvals) / np.log(10)
-        log10_qval_clean[np.isnan(log10_qval_clean)] = 1
-        log10_qval_clean = np.clip(log10_qval_clean, log10_threshold, 0, log10_qval_clean)
-        return log10_qval_clean
-
-    def log_fold_change_pair(
-            self,
-            group1,
-            group2,
-            base=np.e
-    ):
-        """
-        Get log fold changes of the comparison of group1 and group2.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param base: Base of logarithm.
-        :return: log fold changes
-        """
-        assert self._logfc is not None
-
-        self._check_groups(group1, group2)
-        return self.log_fold_change(base=base)[self.groups.index(group1), self.groups.index(group2), :]
-
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
-        """
-        res = super().summary(**kwargs)
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def summary_pair(
-            self,
-            group1,
-            group2,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
-
-            - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": self.pval_pair(group1=group1, group2=group2),
-            "qval": self.qval_pair(group1=group1, group2=group2),
-            "log2fc": self.log_fold_change_pair(group1=group1, group2=group2, base=2),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-
-class DifferentialExpressionTestZTest(_DifferentialExpressionTestMulti):
-    """
-    Pairwise unit_test between more than 2 groups per gene.
-    """
-
-    model_estim: glm.typing.EstimatorBaseTyping
-    theta_mle: np.ndarray
-    theta_sd: np.ndarray
-
-    def __init__(
-            self,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            grouping,
-            groups,
-            correction_type: str
-    ):
-        super().__init__(correction_type=correction_type)
-        self.model_estim = model_estim
-        self.grouping = grouping
-        self.groups = list(np.asarray(groups))
-
-        # Values of parameter estimates: coefficients x genes array with one coefficient per group
-        self._theta_mle = model_estim.a_var
-        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
-        # Need .copy() here as nextafter needs mutabls copy.
-        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
-        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
-        self._theta_sd = np.sqrt(theta_sd)
-        self._logfc = None
-
-        # Call tests in constructor.
-        _ = self.pval
-        _ = self.qval
-
-    def _test(self, **kwargs):
-        groups = self.groups
-        num_features = self.model_estim.x.shape[1]
-
-        pvals = np.tile(np.NaN, [len(groups), len(groups), num_features])
-        pvals[np.eye(pvals.shape[0]).astype(bool)] = 1
-
-        theta_mle = self._theta_mle
-        theta_sd = self._theta_sd
-
-        for i, g1 in enumerate(groups):
-            for j, g2 in enumerate(groups[(i + 1):]):
-                j = j + i + 1
-
-                pvals[i, j] = stats.two_coef_z_test(theta_mle0=theta_mle[i], theta_mle1=theta_mle[j],
-                                                    theta_sd0=theta_sd[i], theta_sd1=theta_sd[j])
-                pvals[j, i] = pvals[i, j]
-
-        return pvals
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return np.asarray(self.model_estim.input_data.features)
-
-    @property
-    def x(self):
-        return self.model_estim.x
-
-    @property
-    def log_likelihood(self):
-        return self.model_estim.log_likelihood
-
-    @property
-    def model_gradient(self):
-        return self.model_estim.jacobian
-
-    def _ave(self):
-        """
-        Returns the mean expression by gene
-
-        :return: np.ndarray
-        """
-
-        return np.mean(self.model_estim.x, axis=0)
-
-    def log_fold_change(self, base=np.e, **kwargs):
-        """
-        Returns matrix of fold changes per gene
-        """
-        if self._logfc is None:
-            groups = self.groups
-            num_features = self.model_estim.x.shape[1]
-
-            logfc = np.tile(np.NaN, [len(groups), len(groups), num_features])
-            logfc[np.eye(logfc.shape[0]).astype(bool)] = 0
-
-            theta_mle = self._theta_mle
-
-            for i, g1 in enumerate(groups):
-                for j, g2 in enumerate(groups[(i + 1):]):
-                    j = j + i + 1
-
-                    logfc[i, j] = theta_mle[j] - theta_mle[i]
-                    logfc[j, i] = -logfc[i, j]
-
-            self._logfc = logfc
-
-        if base == np.e:
-            return self._logfc
-        else:
-            return self._logfc / np.log(base)
-
-    def _check_groups(self, group1, group2):
-        if group1 not in self.groups:
-            raise ValueError('group1 not recognized')
-        if group2 not in self.groups:
-            raise ValueError('group2 not recognized')
-
-    def pval_pair(self, group1, group2):
-        self._check_groups(group1, group2)
-        return self.pval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def qval_pair(self, group1, group2):
-        self._check_groups(group1, group2)
-        return self.qval[self.groups.index(group1), self.groups.index(group2), :]
-
-    def log_fold_change_pair(self, group1, group2, base=np.e):
-        self._check_groups(group1, group2)
-        return self.log_fold_change(base=base)[self.groups.index(group1), self.groups.index(group2), :]
-
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
-        """
-        res = super().summary(**kwargs)
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def summary_pair(
-            self,
-            group1,
-            group2,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param group1: Identifier of first group of observations in pair-wise comparison.
-        :param group2: Identifier of second group of observations in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
-
-            - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": self.pval_pair(group1=group1, group2=group2),
-            "qval": self.qval_pair(group1=group1, group2=group2),
-            "log2fc": self.log_fold_change_pair(group1=group1, group2=group2, base=2),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-
-class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestMulti):
-    """
-    Pairwise unit_test between more than 2 groups per gene with lazy evaluation.
-
-    This class performs pairwise tests upon enquiry only and does not store them
-    and is therefore suited so very large group sets for which the lfc and
-    p-value matrices of the size [genes, groups, groups] are too big to fit into
-    memory.
-    """
-
-    model_estim: glm.typing.EstimatorBaseTyping
-    _theta_mle: np.ndarray
-    _theta_sd: np.ndarray
-
-    def __init__(
-            self,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            grouping, groups,
-            correction_type="global"
-    ):
-        super().__init__(correction_type=correction_type)
-        self.model_estim = model_estim
-        self.grouping = grouping
-        if isinstance(groups, list):
-            self.groups = groups
-        else:
-            self.groups = groups.tolist()
-
-        # Values of parameter estimates: coefficients x genes array with one coefficient per group
-        self._theta_mle = model_estim.a_var
-        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
-        # Need .copy() here as nextafter needs mutabls copy.
-        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
-        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
-        self._theta_sd = np.sqrt(theta_sd)
-
-    def _correction(self, pvals, method="fdr_bh") -> np.ndarray:
-        """
-        Performs multiple testing corrections available in statsmodels.stats.multitest.multipletests().
-
-        This overwrites the parent function which uses self.pval which is not used in this
-        lazy implementation.
-
-        :param pvals: P-value array to correct.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        """
-        if self._correction_type.lower() == "global":
-            pval_shape = pvals.shape
-            pvals = np.reshape(pvals, -1)
-            qvals = correction.correct(pvals=pvals, method=method)
-            qvals = np.reshape(qvals, pval_shape)
-        elif self._correction_type.lower() == "by_test":
-            qvals = np.apply_along_axis(
-                func1d=lambda p: correction.correct(pvals=p, method=method),
-                axis=-1,
-                arr=pvals,
-            )
-        else:
-            raise ValueError("method " + method + " not recognized in _correction()")
-
-        return qvals
-
-    def _test(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        pass
-
-    def _test_pairs(self, groups0, groups1):
-        num_features = self.model_estim.x.shape[1]
-        pvals = np.tile(np.NaN, [len(groups0), len(groups1), num_features])
-
-        for i, g0 in enumerate(groups0):
-            for j, g1 in enumerate(groups1):
-                if g0 != g1:
-                    pvals[i, j] = stats.two_coef_z_test(
-                        theta_mle0=self._theta_mle[g0],
-                        theta_mle1=self._theta_mle[g1],
-                        theta_sd0=self._theta_sd[g0],
-                        theta_sd1=self._theta_sd[g1]
-                    )
-                else:
-                    pvals[i, j] = 1
-
-        return pvals
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return np.asarray(self.model_estim.input_data.features)
-
-    @property
-    def x(self):
-        return self.model_estim.x
-
-    @property
-    def log_likelihood(self):
-        return self.model_estim.log_likelihood
-
-    @property
-    def model_gradient(self):
-        return self.model_estim.jacobian
-
-    def _ave(self):
-        """
-        Returns the mean expression by gene
-
-        :return: np.ndarray
-        """
-
-        return np.mean(self.model_estim.x, axis=0)
-
-    @property
-    def pval(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    @property
-    def qval(self, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        raise ValueError("This function is not available in lazy results evaluation as it would "
-                         "require all pairwise tests to be performed.")
-
-    def log_fold_change(self, base=np.e, **kwargs):
-        """
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-        """
-        pass
-
-    def summary(
-            self,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        This function is not available in lazy results evaluation as it would
-        require all pairwise tests to be performed.
-
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: Summary table of differential expression test.
-        """
-        pass
-
-    def _check_groups(self, groups0, groups1):
-        if not isinstance(groups0, list):
-            groups0 = [groups0]
-        if not isinstance(groups1, list):
-            groups1 = [groups1]
-        for g in groups0:
-            if g not in self.groups:
-                raise ValueError('groups0 element '+str(g)+' not recognized')
-        for g in groups1:
-            if g not in self.groups:
-                raise ValueError('groups1 element '+str(g)+' not recognized')
-
-    def _groups_idx(self, groups):
-        if not isinstance(groups, list):
-            groups = [groups]
-        return np.array([self.groups.index(x) for x in groups])
-
-    def pval_pairs(self, groups0=None, groups1=None):
-        """
-        Return p-values for all pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :return: P-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-        return self._test_pairs(groups0=groups0, groups1=groups1)
-
-    def qval_pairs(self, groups0=None, groups1=None, method="fdr_bh", **kwargs):
-        """
-        Return multiple testing-corrected p-values for all
-        pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param method: Multiple testing correction method.
-            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
-        :return: Multiple testing-corrected p-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-        pval = self.pval_pair(groups0=groups0, groups1=groups1)
-        return self._correction(pval=pval, method=method, **kwargs)
-
-    def log_fold_change_pairs(self, groups0=None, groups1=None, base=np.e):
-        """
-        Return log-fold changes for all pairwise comparisons of groups0 and groups1.
-
-        If you want to test one group (such as a control) against all other groups
-        (one test for each other group), give the control group id in groups0
-        and leave groups1=None, groups1 is then set to the full set of all groups.
-
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param base: Base of logarithm of log-fold change.
-        :return: P-values of pair-wise comparison.
-        """
-        if groups0 is None:
-            groups0 = self.groups
-        if groups1 is None:
-            groups1 = self.groups
-        self._check_groups(groups0, groups1)
-        groups0 = self._groups_idx(groups0)
-        groups1 = self._groups_idx(groups1)
-
-        num_features = self._theta_mle.shape[1]
-
-        logfc = np.zeros(shape=(len(groups0), len(groups1), num_features))
-        for i, g0 in enumerate(groups0):
-            for j, g1 in enumerate(groups1):
-                logfc[i, j, :] = self._theta_mle[g0, :] - self._theta_mle[g1, :]
-
-        if base == np.e:
-            return logfc
-        else:
-            return logfc / np.log(base)
-
-    def summary_pair(
-            self,
-            group0,
-            group1,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results of single pairwose comparison
-        into an output table.
-
-        :param group0: First set of groups in pair-wise comparison.
-        :param group1: Second set of groups in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
-        :return: pandas.DataFrame with the following columns:
-
-            - gene: the gene id's
-            - pval: the per-gene p-value of the selected test
-            - qval: the per-gene q-value of the selected test
-            - log2fc: the per-gene log2 fold change of the selected test
-            - mean: the mean expression of the gene across all groups
-        """
-        assert self.gene_ids is not None
-
-        if len(group0) != 1:
-            raise ValueError("group0 should only contain one entry in summary_pair()")
-        if len(group1) != 1:
-            raise ValueError("group1 should only contain one entry in summary_pair()")
-
-        pval = self.pval_pairs(groups0=group0, groups1=group1)
-        qval = self._correction(pvals=pval, **kwargs)
-        res = pd.DataFrame({
-            "gene": self.gene_ids,
-            "pval": pval.flatten(),
-            "qval": qval.flatten(),
-            "log2fc": self.log_fold_change_pairs(groups0=group0, groups1=group1, base=2).flatten(),
-            "mean": np.asarray(self.mean)
-        })
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
+        _DifferentialExpressionTest.__init__(self=self)
+        self._correction_type = correction_type
 
-        return res
+    def _correction(self, method):
+        if self._correction_type.lower() == "global":
+            pvals = np.reshape(self.pval, -1)
+            qvals = correction.correct(pvals=pvals, method=method)
+            qvals = np.reshape(qvals, self.pval.shape)
+            return qvals
+        elif self._correction_type.lower() == "by_test":
+            qvals = np.apply_along_axis(
+                func1d=lambda pv: correction.correct(pvals=pv, method=method),
+                axis=-1,
+                arr=self.pval,
+            )
+            return qvals
 
-    def summary_pairs(
-            self,
-            groups0,
-            groups1=None,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None,
-            **kwargs
-    ) -> pd.DataFrame:
+    def summary(self, **kwargs) -> pd.DataFrame:
         """
-        Summarize differential expression results of a set of
-        pairwise comparisons into an output table.
+        Summarize differential expression results into an output table.
 
-        :param groups0: First set of groups in pair-wise comparison.
-        :param groups1: Second set of groups in pair-wise comparison.
-        :param qval_thres: Upper bound of corrected p-values for gene to be included.
-        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
-        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
-        :param mean_thres: Lower bound of average expression for gene to be included.
         :return: pandas.DataFrame with the following columns:
 
             - gene: the gene id's
@@ -2623,14 +1848,9 @@ def summary_pairs(
             - mean: the mean expression of the gene across all groups
         """
         assert self.gene_ids is not None
-        if groups1 is None:
-            groups1 = self.groups
-
-        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
-        qval = self._correction(pvals=pval, **kwargs)
 
         # calculate maximum logFC of lower triangular fold change matrix
-        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
+        raw_logfc = self.log_fold_change(base=2.)
 
         # first flatten all dimensions up to the last 'gene' dimension
         flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
@@ -2641,20 +1861,16 @@ def summary_pairs(
 
         res = pd.DataFrame({
             "gene": self.gene_ids,
-            "pval": np.min(pval, axis=(0, 1)),
-            "qval": np.min(qval, axis=(0, 1)),
+            # return minimal pval by gene:
+            "pval": np.min(self.pval.reshape(-1, self.pval.shape[-1]), axis=0),
+            # return minimal qval by gene:
+            "qval": np.min(self.qval.reshape(-1, self.qval.shape[-1]), axis=0),
+            # return maximal logFC by gene:
             "log2fc": np.asarray(logfc),
+            # return mean expression across all groups by gene:
             "mean": np.asarray(self.mean)
         })
 
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
         return res
 
 
@@ -2854,475 +2070,3 @@ def summary(self, qval_thres=None, fc_upper_thres=None,
 
         return res
 
-
-class _DifferentialExpressionTestCont(_DifferentialExpressionTestSingle):
-    _de_test: _DifferentialExpressionTestSingle
-    _model_estim: glm.typing.EstimatorBaseTyping
-    _size_factors: np.ndarray
-    _continuous_coords: np.ndarray
-    _spline_coefs: list
-
-    def __init__(
-            self,
-            de_test: _DifferentialExpressionTestSingle,
-            model_estim: glm.typing.EstimatorBaseTyping,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str
-    ):
-        self._de_test = de_test
-        self._model_estim = model_estim
-        self._size_factors = size_factors
-        self._continuous_coords = continuous_coords
-        self._spline_coefs = spline_coefs
-        self.noise_model = noise_model
-
-    @property
-    def gene_ids(self) -> np.ndarray:
-        return self._de_test.gene_ids
-
-    @property
-    def x(self):
-        return self._de_test.x
-
-    @property
-    def pval(self) -> np.ndarray:
-        return self._de_test.pval
-
-    @property
-    def qval(self) -> np.ndarray:
-        return self._de_test.qval
-
-    @property
-    def mean(self) -> np.ndarray:
-        return self._de_test.mean
-
-    @property
-    def log_likelihood(self) -> np.ndarray:
-        return self._de_test.log_likelihood
-
-    def summary(
-            self,
-            nonnumeric=False,
-            qval_thres=None,
-            fc_upper_thres=None,
-            fc_lower_thres=None,
-            mean_thres=None
-    ) -> pd.DataFrame:
-        """
-        Summarize differential expression results into an output table.
-
-        :param nonnumeric: Whether to include non-numeric covariates in fit.
-        """
-        # Collect summary from differential test object.
-        res = self._de_test.summary()
-        # Overwrite fold change with fold change from temporal model.
-        # Note that log2_fold_change calls log_fold_change from this class
-        # and not from the self._de_test object,
-        # which is called by self._de_test.summary().
-        res['log2fc'] = self.log2_fold_change()
-
-        res = self._threshold_summary(
-            res=res,
-            qval_thres=qval_thres,
-            fc_upper_thres=fc_upper_thres,
-            fc_lower_thres=fc_lower_thres,
-            mean_thres=mean_thres
-        )
-
-        return res
-
-    def log_fold_change(self, base=np.e, genes=None, nonnumeric=False):
-        """
-        Return log_fold_change based on fitted expression values by gene.
-
-        The log_fold_change is defined as the log of the fold change
-        from the minimal to the maximal fitted value by gene.
-
-        :param base: Basis of logarithm.
-        :param genes: Genes for which to return maximum fitted value. Defaults
-            to all genes if None.
-        :param nonnumeric: Whether to include non-numeric covariates in fit.
-        :return: Log-fold change of fitted expression value by gene.
-        """
-        if genes is None:
-            genes = np.asarray(range(self.x.shape[1]))
-        else:
-            genes = self._idx_genes(genes)
-
-        fc = self.max(genes=genes, non_numeric=nonnumeric) - self.min(genes=genes, non_numeric=nonnumeric)
-        fc = np.nextafter(0, 1, out=fc, where=fc == 0)
-
-        return np.log(fc) / np.log(base)
-
-    def _filter_genes_str(self, genes: list):
-        """
-        Filter genes indexed by ID strings by list of genes given in data set.
-
-        :param genes: List of genes to filter.
-        :return: Filtered list of genes
-        """
-        genes_found = np.array([x in self.gene_ids for x in genes])
-        if any(np.logical_not(genes_found)):
-            logger.info("did not find some genes, omitting")
-            genes = genes[genes_found]
-        return genes
-
-    def _filter_genes_int(self, genes: list):
-        """
-        Filter genes indexed by integers by gene list length.
-
-        :param genes: List of genes to filter.
-        :return: Filtered list of genes
-        """
-        genes_found = np.array([x < self.x.shape[1] for x in genes])
-        if any(genes_found == False):
-            logger.info("did not find some genes, omitting")
-            genes = genes[genes_found]
-        return genes
-
-    def _idx_genes(self, genes):
-        if not isinstance(genes, list):
-            if isinstance(genes, np.ndarray):
-                genes = genes.tolist()
-            else:
-                genes = [genes]
-
-        if isinstance(genes[0], str):
-            genes = self._filter_genes_str(genes)
-            genes = np.array([self.gene_ids.tolist().index(x) for x in genes])
-        elif isinstance(genes[0], int) or isinstance(genes[0], np.int64):
-            genes = self._filter_genes_int(genes)
-        else:
-            raise ValueError("only string and integer elements allowed in genes")
-        return genes
-
-    def _spline_par_loc_idx(self, intercept=True):
-        """
-        Get indices of spline basis model parameters in
-        entire location parameter model parameter set.
-
-        :param intercept: Whether to include intercept.
-        :return: Indices of spline basis parameters of location model.
-        """
-        par_loc_names = self._model_estim.design_loc.coords['design_loc_params'].values.tolist()
-        idx = [par_loc_names.index(x) for x in self._spline_coefs]
-        if 'Intercept' in par_loc_names and intercept == True:
-            idx = np.concatenate([np.where([[x == 'Intercept' for x in par_loc_names]])[0], idx])
-        return idx
-
-    def _continuous_model(self, idx, non_numeric=False):
-        """
-        Recover continuous fit for a gene.
-
-        :param idx: Index of genes to recover fit for.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Continuuos fit for each cell for given gene.
-        """
-        idx = np.asarray(idx)
-        if non_numeric:
-            mu = np.matmul(self._model_estim.input_data.design_loc,
-                           self._model_estim.par_link_loc[:, idx])
-            if self._size_factors is not None:
-                mu = mu + self._size_factors
-        else:
-            idx_basis = self._spline_par_loc_idx(intercept=True)
-            mu = np.matmul(self._model_estim.input_data.design_loc[:, idx_basis],
-                           self._model_estim.par_link_loc[idx_basis, idx])
-
-        mu = np.exp(mu)
-        return mu
-
-    def max(self, genes, non_numeric=False):
-        """
-        Return maximum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        return np.array([np.max(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
-
-    def min(self, genes, non_numeric=False):
-        """
-        Return minimum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        return np.array([np.min(self._continuous_model(idx=i, non_numeric=non_numeric))
-                         for i in genes])
-
-    def argmax(self, genes, non_numeric=False):
-        """
-        Return maximum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        idx = np.array([np.argmax(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
-        return self._continuous_coords[idx]
-
-    def argmin(self, genes, non_numeric=False):
-        """
-        Return minimum fitted expression value by gene.
-
-        :param genes: Genes for which to return maximum fitted value.
-        :param non_numeric: Whether to include non-numeric covariates in fit.
-        :return: Maximum fitted expression value by gene.
-        """
-        genes = self._idx_genes(genes)
-        idx = np.array([np.argmin(self._continuous_model(idx=i, non_numeric=non_numeric))
-                        for i in genes])
-        return self._continuous_coords[idx]
-
-    def plot_genes(
-            self,
-            genes,
-            hue=None,
-            size=1,
-            log=True,
-            non_numeric=False,
-            save=None,
-            show=True,
-            ncols=2,
-            row_gap=0.3,
-            col_gap=0.25,
-            return_axs=False
-    ):
-        """
-        Plot observed data and spline fits of selected genes.
-
-        :param genes: Gene IDs to plot.
-        :param hue: Confounder to include in plot.
-        :param size: Point size.
-        :param log: Whether to log values.
-        :param non_numeric:
-        :param save: Path+file name stem to save plots to.
-            File will be save+"_genes.png". Does not save if save is None.
-        :param show: Whether to display plot.
-        :param ncols: Number of columns in plot grid if multiple genes are plotted.
-        :param row_gap: Vertical gap between panel rows relative to panel height.
-        :param col_gap: Horizontal gap between panel columns relative to panel width.
-        :param return_axs: Whether to return axis objects of plots.
-        :return: Matplotlib axis objects.
-        """
-
-        import seaborn as sns
-        import matplotlib.pyplot as plt
-        from matplotlib import gridspec
-        from matplotlib import rcParams
-
-        plt.ioff()
-
-        gene_idx = self._idx_genes(genes)
-
-        # Set up gridspec.
-        ncols = ncols if len(gene_idx) > ncols else len(gene_idx)
-        nrows = len(gene_idx) // ncols + (len(gene_idx) - (len(gene_idx) // ncols) * ncols)
-        gs = gridspec.GridSpec(
-            nrows=nrows,
-            ncols=ncols,
-            hspace=row_gap,
-            wspace=col_gap
-        )
-
-        # Define figure size based on panel number and grid.
-        fig = plt.figure(
-            figsize=(
-                ncols * rcParams['figure.figsize'][0],  # width in inches
-                nrows * rcParams['figure.figsize'][1] * (1 + row_gap)  # height in inches
-            )
-        )
-
-        # Build axis objects in loop.
-        axs = []
-        for i, g in enumerate(gene_idx):
-            ax = plt.subplot(gs[i])
-            axs.append(ax)
-
-            y = self.x[:, g]
-            yhat = self._continuous_model(idx=g, non_numeric=non_numeric)
-            if log:
-                y = np.log(y + 1)
-                yhat = np.log(yhat + 1)
-
-            sns.scatterplot(
-                x=self._continuous_coords,
-                y=y,
-                hue=hue,
-                size=size,
-                ax=ax,
-                legend=False
-            )
-            sns.lineplot(
-                x=self._continuous_coords,
-                y=yhat,
-                hue=hue,
-                ax=ax
-            )
-
-            ax.set_title(genes[i])
-            ax.set_xlabel("continuous")
-            if log:
-                ax.set_ylabel("log expression")
-            else:
-                ax.set_ylabel("expression")
-
-        # Save, show and return figure.
-        if save is not None:
-            plt.savefig(save+'_genes.png')
-
-        if show:
-            plt.show()
-
-        plt.close(fig)
-
-        if return_axs:
-            return axs
-        else:
-            return
-
-    def plot_heatmap(
-            self,
-            genes: Union[list, np.ndarray],
-            save=None,
-            show=True,
-            transform: str = "zscore",
-            nticks=10,
-            cmap: str = "YlGnBu",
-            width=10,
-            height_per_gene=0.5,
-            return_axs=False
-    ):
-        """
-        Plot observed data and spline fits of selected genes.
-
-        :param genes: Gene IDs to plot.
-        :param save: Path+file name stem to save plots to.
-            File will be save+"_genes.png". Does not save if save is None.
-        :param show: Whether to display plot.
-        :param transform: Gene-wise transform to use.
-        :param nticks: Number of x ticks.
-        :param cmap: matplotlib cmap.
-        :param width: Width of heatmap figure.
-        :param height_per_gene: Height of each row (gene) in heatmap figure.
-        :param return_axs: Whether to return axis objects of plots.
-        :return: Matplotlib axis objects.
-        """
-        import seaborn as sns
-        import matplotlib.pyplot as plt
-
-        plt.ioff()
-
-        gene_idx = self._idx_genes(genes)
-
-        # Define figure.
-        fig = plt.figure(figsize=(width, height_per_gene * len(gene_idx)))
-        ax = fig.add_subplot(111)
-
-        # Build heatmap matrix.
-        # Add in data.
-        data = np.array([
-            self._continuous_model(idx=g, non_numeric=False)
-            for i, g in enumerate(gene_idx)
-        ])
-        # Order columns by continuous covariate.
-        idx_x_sorted = np.argsort(self._continuous_coords)
-        data = data[:, idx_x_sorted]
-        xcoord = self._continuous_coords[idx_x_sorted]
-
-        if transform.lower() == "log10":
-            data = np.nextafter(0, 1, out=data, where=data == 0)
-            data = np.log(data) / np.log(10)
-        elif transform.lower() == "zscore":
-            mu = np.mean(data, axis=0)
-            sd = np.std(data, axis=0)
-            sd = np.nextafter(0, 1, out=sd, where=sd == 0)
-            data = np.array([(x - mu[i]) / sd[i] for i, x in enumerate(data)])
-        elif transform.lower() == "none":
-            pass
-        else:
-            raise ValueError("transform not recognized in plot_heatmap()")
-
-        # Create heatmap.
-        sns.heatmap(data=data, cmap=cmap, ax=ax)
-
-        # Set up axis labels.
-        xtick_pos = np.asarray(np.round(np.linspace(
-            start=0,
-            stop=data.shape[1] - 1,
-            num=nticks,
-            endpoint=True
-        )), dtype=int)
-        xtick_lab = [str(np.round(xcoord[np.argmin(np.abs(xcoord - xcoord[i]))], 2))
-                     for i in xtick_pos]
-        ax.set_xticks(xtick_pos)
-        ax.set_xticklabels(xtick_lab)
-        ax.set_xlabel("continuous")
-        plt.yticks(np.arange(len(genes)), genes, rotation='horizontal')
-        ax.set_ylabel("genes")
-
-        # Save, show and return figure.
-        if save is not None:
-            plt.savefig(save + '_genes.png')
-
-        if show:
-            plt.show()
-
-        plt.close(fig)
-
-        if return_axs:
-            return ax
-        else:
-            return
-
-
-class DifferentialExpressionTestWaldCont(_DifferentialExpressionTestCont):
-    de_test: DifferentialExpressionTestWald
-
-    def __init__(
-            self,
-            de_test: DifferentialExpressionTestWald,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str,
-    ):
-        super().__init__(
-            de_test=de_test,
-            model_estim=de_test.model_estim,
-            size_factors=size_factors,
-            continuous_coords=continuous_coords,
-            spline_coefs=spline_coefs,
-            noise_model=noise_model
-        )
-
-
-class DifferentialExpressionTestLRTCont(_DifferentialExpressionTestCont):
-    de_test: DifferentialExpressionTestLRT
-
-    def __init__(
-            self,
-            de_test: DifferentialExpressionTestLRT,
-            size_factors: np.ndarray,
-            continuous_coords: np.ndarray,
-            spline_coefs: list,
-            noise_model: str
-    ):
-        super().__init__(
-            de_test=de_test,
-            model_estim=de_test.full_estim,
-            size_factors=size_factors,
-            continuous_coords=continuous_coords,
-            spline_coefs=spline_coefs,
-            noise_model=noise_model
-        )
diff --git a/diffxpy/testing/det_cont.py b/diffxpy/testing/det_cont.py
new file mode 100644
index 0000000..ca6d221
--- /dev/null
+++ b/diffxpy/testing/det_cont.py
@@ -0,0 +1,551 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import batchglm.api as glm
+import logging
+import numpy as np
+import pandas as pd
+import scipy
+import scipy.sparse
+from typing import Union
+
+from .det import _DifferentialExpressionTestSingle, DifferentialExpressionTestWald, DifferentialExpressionTestLRT
+
+logger = logging.getLogger("diffxpy")
+
+
+class _DifferentialExpressionTestCont(_DifferentialExpressionTestSingle):
+    _de_test: _DifferentialExpressionTestSingle
+    _model_estim: glm.typing.EstimatorBaseTyping
+    _size_factors: np.ndarray
+    _continuous_coords: np.ndarray
+    _spline_coefs: list
+
+    def __init__(
+            self,
+            de_test: _DifferentialExpressionTestSingle,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            interpolated_spline_basis: np.ndarray,
+            noise_model: str
+    ):
+        self._de_test = de_test
+        self._model_estim = model_estim
+        self._size_factors = size_factors
+        self._continuous_coords = continuous_coords
+        self._spline_coefs = spline_coefs
+        self._interpolated_spline_basis = interpolated_spline_basis
+        self.noise_model = noise_model
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return self._de_test.gene_ids
+
+    @property
+    def x(self):
+        return self._de_test.x
+
+    @property
+    def pval(self) -> np.ndarray:
+        return self._de_test.pval
+
+    @property
+    def qval(self) -> np.ndarray:
+        return self._de_test.qval
+
+    @property
+    def mean(self) -> np.ndarray:
+        return self._de_test.mean
+
+    @property
+    def log_likelihood(self) -> np.ndarray:
+        return self._de_test.log_likelihood
+
+    def summary(
+            self,
+            non_numeric=False,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        """
+        # Collect summary from differential test object.
+        res = self._de_test.summary()
+        # Overwrite fold change with fold change from temporal model.
+        # Note that log2_fold_change calls log_fold_change from this class
+        # and not from the self._de_test object,
+        # which is called by self._de_test.summary().
+        res['log2fc'] = self.log2_fold_change()
+
+        res = self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+        return res
+
+    def log_fold_change(self, base=np.e, genes=None, non_numeric=False):
+        """
+        Return log_fold_change based on fitted expression values by gene.
+
+        The log_fold_change is defined as the log of the fold change
+        from the minimal to the maximal fitted value by gene.
+
+        :param base: Basis of logarithm.
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        if genes is None:
+            idx = np.arange(0, self.x.shape[1])
+        else:
+            idx, genes = self._idx_genes(genes)
+
+        max_val = self.max(genes=idx, non_numeric=non_numeric)
+        min_val = self.min(genes=idx, non_numeric=non_numeric)
+        max_val = np.nextafter(0, 1, out=max_val, where=max_val == 0)
+        min_val = np.nextafter(0, 1, out=min_val, where=min_val == 0)
+        return (np.log(max_val) - np.log(min_val)) / np.log(base)
+
+    def log2_fold_change(self, genes=None, non_numeric=False):
+        """
+        Calculates the pairwise log_2 fold change(s) for this DifferentialExpressionTest.
+
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        return self.log_fold_change(base=2, genes=genes, non_numeric=non_numeric)
+
+    def log10_fold_change(self, genes=None, non_numeric=False):
+        """
+        Calculates the log_10 fold change(s) for this DifferentialExpressionTest.
+
+        :param genes: Genes for which to return maximum fitted value. Defaults
+            to all genes if None.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Log-fold change of fitted expression value by gene.
+        """
+        return self.log_fold_change(base=10, genes=genes, non_numeric=non_numeric)
+
+    def _filter_genes_str(self, genes: list):
+        """
+        Filter genes indexed by ID strings by list of genes given in data set.
+
+        :param genes: List of genes to filter.
+        :return: Filtered list of genes
+        """
+        genes_found = np.array([x in self.gene_ids for x in genes])
+        if any(np.logical_not(genes_found)):
+            logger.info("did not find some genes, omitting")
+            genes = genes[genes_found]
+        return genes
+
+    def _filter_genes_int(self, genes: list):
+        """
+        Filter genes indexed by integers by gene list length.
+
+        :param genes: List of genes to filter.
+        :return: Filtered list of genes
+        """
+        genes_found = np.array([x < self.x.shape[1] for x in genes])
+        if any(np.logical_not(genes_found)):
+            logger.info("did not find some genes, omitting")
+            genes = genes[genes_found]
+        return genes
+
+    def _idx_genes(self, genes):
+        if not isinstance(genes, list):
+            if isinstance(genes, np.ndarray):
+                genes = genes.tolist()
+            elif isinstance(genes, pd.Index):
+                genes = genes.tolist()
+            else:
+                genes = [genes]
+
+        if isinstance(genes[0], str):
+            genes = self._filter_genes_str(genes)
+            idx = np.array([self.gene_ids.tolist().index(x) for x in genes])
+        elif isinstance(genes[0], int) or isinstance(genes[0], np.number):
+            genes = self._filter_genes_int(genes)
+            idx = genes
+            genes = [self.gene_ids.tolist()[x] for x in idx]
+        else:
+            raise ValueError("only string and integer elements allowed in genes")
+        return idx, genes
+
+    def _spline_par_loc_idx(self, intercept=True):
+        """
+        Get indices of spline basis model parameters in
+        entire location parameter model parameter set.
+
+        :param intercept: Whether to include intercept.
+        :return: Indices of spline basis parameters of location model.
+        """
+        par_loc_names = self._model_estim.input_data.loc_names
+        idx = [par_loc_names.index(x) for x in self._spline_coefs]
+        if 'Intercept' in par_loc_names and intercept:
+            idx = np.concatenate([np.where([[x == 'Intercept' for x in par_loc_names]])[0], idx])
+        return idx
+
+    def _continuous_model(self, idx, non_numeric=False):
+        """
+        Recover continuous fit for a gene.
+
+        :param idx: Index of genes to recover fit for.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Continuuos fit for each cell for given gene.
+        """
+        idx = np.asarray(idx)
+
+        if non_numeric:
+            mu = np.matmul(self._model_estim.input_data.design_loc,
+                           self._model_estim.model.a[:, idx])
+            if self._size_factors is not None:
+                mu = mu + self._model_estim.input_data.size_factors
+        else:
+            idx_basis = self._spline_par_loc_idx(intercept=True)
+            mu = np.matmul(self._model_estim.input_data.design_loc[:, idx_basis],
+                           self._model_estim.model.a[idx_basis, idx])
+
+        mu = np.exp(mu)
+        return mu
+
+    def _continuous_interpolation(self, idx):
+        """
+        Recover continuous fit for a gene.
+
+        :param idx: Index of genes to recover fit for.
+        :return: Continuuos fit for each cell for given gene.
+        """
+        idx = np.asarray(idx)
+        idx_basis = self._spline_par_loc_idx(intercept=True)
+        eta_loc = np.matmul(self._interpolated_spline_basis[:, :-1], self._model_estim.model.a[idx_basis, :][:, idx])
+        mu = np.exp(eta_loc)
+        t_eval = self._interpolated_spline_basis[:, -1]
+        return t_eval, mu
+
+    def max(self, genes, non_numeric=False):
+        """
+        Return maximum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        idx, genes = self._idx_genes(genes)
+        return np.array([np.max(self._continuous_model(idx=i, non_numeric=non_numeric))
+                         for i in idx])
+
+    def min(self, genes, non_numeric=False):
+        """
+        Return minimum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        idx, genes = self._idx_genes(genes)
+        return np.array([np.min(self._continuous_model(idx=i, non_numeric=non_numeric))
+                         for i in idx])
+
+    def argmax(self, genes, non_numeric=False):
+        """
+        Return maximum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        idx, genes = self._idx_genes(genes)
+        idx = np.array([np.argmax(self._continuous_model(idx=i, non_numeric=non_numeric))
+                        for i in idx])
+        return self._continuous_coords[idx]
+
+    def argmin(self, genes, non_numeric=False):
+        """
+        Return minimum fitted expression value by gene.
+
+        :param genes: Genes for which to return maximum fitted value.
+        :param non_numeric: Whether to include non-numeric covariates in fit.
+        :return: Maximum fitted expression value by gene.
+        """
+        idx, genes = self._idx_genes(genes)
+        idx = np.array([np.argmin(self._continuous_model(idx=i, non_numeric=non_numeric))
+                        for i in idx])
+        return self._continuous_coords[idx]
+
+    def plot_genes(
+            self,
+            genes,
+            hue=None,
+            scalings=None,
+            size=1,
+            log=True,
+            save=None,
+            show=True,
+            ncols=2,
+            row_gap=0.3,
+            col_gap=0.25,
+            return_axs=False
+    ):
+        """
+        Plot observed data and spline fits of selected genes.
+
+        :param genes: Gene IDs to plot.
+        :param hue: Confounder to include in plot. Must be length number of observations.
+        :param scalings: Names of scaling coefficients to plot separate model curves for.
+        :param size: Point size.
+        :param log: Whether to log values.
+        :param save: Path+file name stem to save plots to.
+            File will be save+"_genes.png". Does not save if save is None.
+        :param show: Whether to display plot.
+        :param ncols: Number of columns in plot grid if multiple genes are plotted.
+        :param row_gap: Vertical gap between panel rows relative to panel height.
+        :param col_gap: Horizontal gap between panel columns relative to panel width.
+        :param return_axs: Whether to return axis objects of plots.
+        :return: Matplotlib axis objects.
+        """
+
+        import seaborn as sns
+        import matplotlib.pyplot as plt
+        from matplotlib import gridspec
+        from matplotlib import rcParams
+
+        plt.ioff()
+
+        gene_idx, gene_ids = self._idx_genes(genes)
+
+        # Set up gridspec.
+        ncols = ncols if len(gene_idx) > ncols else len(gene_idx)
+        nrows = len(gene_idx) // ncols + (len(gene_idx) - (len(gene_idx) // ncols) * ncols)
+        gs = gridspec.GridSpec(
+            nrows=nrows,
+            ncols=ncols,
+            hspace=row_gap,
+            wspace=col_gap
+        )
+
+        # Define figure size based on panel number and grid.
+        fig = plt.figure(
+            figsize=(
+                ncols * rcParams['figure.figsize'][0],  # width in inches
+                nrows * rcParams['figure.figsize'][1] * (1 + row_gap)  # height in inches
+            )
+        )
+
+        # Build axis objects in loop.
+        axs = []
+        for i, g in enumerate(gene_idx):
+            ax = plt.subplot(gs[i])
+            axs.append(ax)
+
+            y = self.x[:, g]
+            if isinstance(y, scipy.sparse.csr_matrix):
+                y = np.asarray(y.todense()).flatten()
+                if self._model_estim.input_data.size_factors is not None:
+                    y = y / self._model_estim.input_data.size_factors
+            t_continuous, yhat = self._continuous_interpolation(idx=g)
+            if scalings is not None:
+                yhat = np.vstack([
+                    [yhat],
+                    [
+                        yhat * np.expand_dims(
+                            np.exp(self._model_estim.a_var[self._model_estim.input_data.loc_names.index(x), g]),
+                            axis=0
+                        )
+                        for i, x in enumerate(scalings)
+                    ]
+                ])
+            else:
+                yhat = np.expand_dims(yhat, axis=0)
+            if log:
+                y = np.log(y + 1)
+                yhat = np.log(yhat + 1)
+
+            sns.scatterplot(
+                x=self._continuous_coords,
+                y=y,
+                hue=hue,
+                size=size,
+                ax=ax,
+                legend=False
+            )
+            for j in range(yhat.shape[0]):
+                sns.lineplot(
+                    x=t_continuous,
+                    y=yhat[j, :],
+                    hue=None,
+                    ax=ax
+                )
+
+            ax.set_title(gene_ids[i])
+            ax.set_xlabel("continuous")
+            if log:
+                ax.set_ylabel("log expression")
+            else:
+                ax.set_ylabel("expression")
+
+        # Save, show and return figure.
+        if save is not None:
+            plt.savefig(save+'_genes.png')
+
+        if show:
+            plt.show()
+
+        plt.close(fig)
+        plt.ion()
+
+        if return_axs:
+            return axs
+        else:
+            return
+
+    def plot_heatmap(
+            self,
+            genes: Union[list, np.ndarray],
+            save=None,
+            show=True,
+            transform: str = "zscore",
+            nticks=10,
+            cmap: str = "YlGnBu",
+            width=10,
+            height_per_gene=0.5,
+            return_axs=False
+    ):
+        """
+        Plot observed data and spline fits of selected genes.
+
+        :param genes: Gene IDs to plot.
+        :param save: Path+file name stem to save plots to.
+            File will be save+"_genes.png". Does not save if save is None.
+        :param show: Whether to display plot.
+        :param transform: Gene-wise transform to use.
+        :param nticks: Number of x ticks.
+        :param cmap: matplotlib cmap.
+        :param width: Width of heatmap figure.
+        :param height_per_gene: Height of each row (gene) in heatmap figure.
+        :param return_axs: Whether to return axis objects of plots.
+        :return: Matplotlib axis objects.
+        """
+        import seaborn as sns
+        import matplotlib.pyplot as plt
+
+        plt.ioff()
+
+        gene_idx, gene_ids = self._idx_genes(genes)
+
+        # Define figure.
+        fig = plt.figure(figsize=(width, height_per_gene * len(gene_idx)))
+        ax = fig.add_subplot(111)
+
+        # Build heatmap matrix.
+        # Add in data.
+        xcoord, data = self._continuous_interpolation(idx=gene_idx)
+        data = data.T
+
+        if transform.lower() == "log10":
+            data = np.nextafter(0, 1, out=data, where=data == 0)
+            data = np.log(data) / np.log(10)
+        elif transform.lower() == "zscore":
+            mu = np.mean(data, axis=0, keepdims=True)
+            sd = np.std(data, axis=0, keepdims=True)
+            sd = np.nextafter(0, 1, out=sd, where=sd == 0)
+            data = (data - mu) / sd
+        elif transform.lower() == "none":
+            pass
+        else:
+            raise ValueError("transform not recognized in plot_heatmap()")
+
+        # Create heatmap.
+        sns.heatmap(data=data, cmap=cmap, ax=ax)
+
+        # Set up axis labels.
+        xtick_pos = np.asarray(np.round(np.linspace(
+            start=0,
+            stop=data.shape[1] - 1,
+            num=nticks,
+            endpoint=True
+        )), dtype=int)
+        xtick_lab = [str(np.round(xcoord[np.argmin(np.abs(xcoord - xcoord[i]))], 2))
+                     for i in xtick_pos]
+        ax.set_xticks(xtick_pos)
+        ax.set_xticklabels(xtick_lab)
+        ax.set_xlabel("continuous")
+        plt.yticks(np.arange(len(genes)), gene_ids, rotation='horizontal')
+        ax.set_ylabel("genes")
+
+        # Save, show and return figure.
+        if save is not None:
+            plt.savefig(save + '_genes.png')
+
+        if show:
+            plt.show()
+
+        plt.close(fig)
+        plt.ion()
+
+        if return_axs:
+            return ax
+        else:
+            return
+
+
+class DifferentialExpressionTestWaldCont(_DifferentialExpressionTestCont):
+    de_test: DifferentialExpressionTestWald
+
+    def __init__(
+            self,
+            de_test: DifferentialExpressionTestWald,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            interpolated_spline_basis: np.ndarray,
+            noise_model: str
+    ):
+        super(DifferentialExpressionTestWaldCont, self).__init__(
+            de_test=de_test,
+            model_estim=de_test.model_estim,
+            size_factors=size_factors,
+            continuous_coords=continuous_coords,
+            spline_coefs=spline_coefs,
+            interpolated_spline_basis=interpolated_spline_basis,
+            noise_model=noise_model
+        )
+
+
+class DifferentialExpressionTestLRTCont(_DifferentialExpressionTestCont):
+    de_test: DifferentialExpressionTestLRT
+
+    def __init__(
+            self,
+            de_test: DifferentialExpressionTestLRT,
+            size_factors: np.ndarray,
+            continuous_coords: np.ndarray,
+            spline_coefs: list,
+            interpolated_spline_basis: np.ndarray,
+            noise_model: str
+    ):
+        super().__init__(
+            de_test=de_test,
+            model_estim=de_test.full_estim,
+            size_factors=size_factors,
+            continuous_coords=continuous_coords,
+            spline_coefs=spline_coefs,
+            interpolated_spline_basis=interpolated_spline_basis,
+            noise_model=noise_model
+        )
diff --git a/diffxpy/testing/det_pair.py b/diffxpy/testing/det_pair.py
new file mode 100644
index 0000000..3ddd175
--- /dev/null
+++ b/diffxpy/testing/det_pair.py
@@ -0,0 +1,622 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import batchglm.api as glm
+import logging
+import numpy as np
+import pandas as pd
+from typing import List, Union
+
+from ..stats import stats
+from . import correction
+
+from .det import _DifferentialExpressionTestMulti
+
+logger = logging.getLogger("diffxpy")
+
+
+class _DifferentialExpressionTestPairwiseBase(_DifferentialExpressionTestMulti):
+    """
+    Pairwise differential expression tests base class.
+
+    Defines API of accessing test results of pairs of groups. The underlying accessors depend
+    on the type of test and are defined in the subclasses.
+    """
+
+    groups: List[str]
+    _pval: Union[None, np.ndarray]
+    _qval: Union[None, np.ndarray]
+    _logfc: Union[None, np.ndarray]
+
+    def _get_group_idx(self, groups0, groups1):
+        if np.any([x not in self.groups for x in groups0]):
+            raise ValueError('element of groups1 not recognized; not in groups %s' % self.groups)
+        if np.any([x not in self.groups for x in groups1]):
+            raise ValueError('element of groups2 not recognized; not in groups %s' % self.groups)
+
+        return np.array([self.groups.index(x) for x in groups0]), \
+               np.array([self.groups.index(x) for x in groups1])
+
+    def _correction_pairs(self, idx0, idx1, method):
+        if self._correction_type.lower() == "global":
+            pval = self._pval_pairs(idx0=idx0, idx1=idx1)
+            pval_reshape = np.reshape(pval, -1)
+            qvals = correction.correct(pvals=pval_reshape, method=method)
+            qvals = np.reshape(qvals, pval.shape)
+        elif self._correction_type.lower() == "by_test":
+            qvals = np.apply_along_axis(
+                func1d=lambda pv: correction.correct(pvals=pv, method=method),
+                axis=-1,
+                arr=self._pval_pairs(idx0=idx0, idx1=idx1)
+            )
+        return qvals
+
+    def log_fold_change(self, base=np.e, **kwargs):
+        if base == np.e:
+            return self._logfc
+        else:
+            return self._logfc / np.log(base)
+
+    @abc.abstractmethod
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pass
+
+    def _qval_pairs(self, idx0, idx1, method="fdr_bh"):
+        """
+        Test-specific q-values accessor for the comparison of both sets of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        return self._correction_pairs(idx0=idx0, idx1=idx1, method=method)
+
+    @abc.abstractmethod
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        pass
+
+    def pval_pairs(self, groups0, groups1):
+        """
+        Get p-values of the comparison of groups1 to groups2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._pval_pairs(idx0=idx0, idx1=idx1)
+
+    def qval_pairs(self, groups0, groups1, method="fdr_bh"):
+        """
+        Get q-values of the comparison of groups1 to groups2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: q-values
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._qval_pairs(idx0=idx0, idx1=idx1, method=method)
+
+    def log10_pval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
+        """
+        Return log10 transformed and cleaned p-values.
+
+        NaN p-values are set to one and p-values below log10_threshold
+        in log10 space are set to log10_threshold.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param log10_threshold: minimal log10 p-value to return.
+        :return: Cleaned log10 transformed p-values.
+        """
+        pvals = np.reshape(self.pval_pairs(groups0=groups0, groups1=groups1), -1)
+        pvals = np.nextafter(0, 1, out=pvals, where=pvals == 0)
+        log10_pval_clean = np.log(pvals) / np.log(10)
+        log10_pval_clean[np.isnan(log10_pval_clean)] = 1
+        log10_pval_clean = np.clip(log10_pval_clean, log10_threshold, 0, log10_pval_clean)
+        return log10_pval_clean
+
+    def log10_qval_pairs_clean(self, groups0, groups1, log10_threshold=-30):
+        """
+        Return log10 transformed and cleaned q-values.
+
+        NaN p-values are set to one and q-values below log10_threshold
+        in log10 space are set to log10_threshold.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param log10_threshold: minimal log10 q-value to return.
+        :return: Cleaned log10 transformed q-values.
+        """
+        qvals = np.reshape(self.qval_pairs(groups0=groups0, groups1=groups1), -1)
+        qvals = np.nextafter(0, 1, out=qvals, where=qvals == 0)
+        log10_qval_clean = np.log(qvals) / np.log(10)
+        log10_qval_clean[np.isnan(log10_qval_clean)] = 1
+        log10_qval_clean = np.clip(log10_qval_clean, log10_threshold, 0, log10_qval_clean)
+        return log10_qval_clean
+
+    def log_fold_change_pairs(
+            self,
+            groups0,
+            groups1,
+            base=np.e
+    ):
+        """
+        Get log fold changes of the comparison of group1 and group2.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold changes
+        """
+        idx0, idx1 = self._get_group_idx(groups0=groups0, groups1=groups1)
+        return self._log_fold_change_pairs(idx0=idx0, idx1=idx1, base=base)
+
+    def summary(
+            self,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None,
+            **kwargs
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param qval_thres: Upper bound of corrected p-values for gene to be included.
+        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
+        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
+        :param mean_thres: Lower bound of average expression for gene to be included.
+        :return: Summary table of differential expression test.
+        """
+        res = super(_DifferentialExpressionTestMulti, self).summary(**kwargs)
+
+        return self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+    def summary_pairs(
+            self,
+            groups0,
+            groups1,
+            qval_thres=None,
+            fc_upper_thres=None,
+            fc_lower_thres=None,
+            mean_thres=None,
+            method="fdr_bh"
+    ) -> pd.DataFrame:
+        """
+        Summarize differential expression results into an output table.
+
+        :param groups0: List of identifier of first set of group of observations in pair-wise comparison.
+        :param groups1: List of identifier of second set of group of observations in pair-wise comparison.
+        :param qval_thres: Upper bound of corrected p-values for gene to be included.
+        :param fc_upper_thres: Upper bound of fold-change for gene to be included.
+        :param fc_lower_thres: Lower bound of fold-change p-values for gene to be included.
+        :param mean_thres: Lower bound of average expression for gene to be included.
+        :param method: Multiple testing correction method.
+            Browse available methods in the annotation of statsmodels.stats.multitest.multipletests().
+        :return: pandas.DataFrame with the following columns:
+            - gene: the gene id's
+            - pval: the minimum per-gene p-value of all tests
+            - qval: the minimum per-gene q-value of all tests
+            - log2fc: the maximal/minimal (depending on which one is higher) log2 fold change of the genes
+            - mean: the mean expression of the gene across all groups
+        """
+        assert self.gene_ids is not None
+
+        pval = self.pval_pairs(groups0=groups0, groups1=groups1)
+        qval = self.qval_pairs(groups0=groups0, groups1=groups1, method=method)
+
+        # calculate maximum logFC of lower triangular fold change matrix
+        raw_logfc = self.log_fold_change_pairs(groups0=groups0, groups1=groups1, base=2)
+
+        # first flatten all dimensions up to the last 'gene' dimension
+        flat_logfc = raw_logfc.reshape(-1, raw_logfc.shape[-1])
+        # next, get argmax of flattened logfc and unravel the true indices from it
+        r, c = np.unravel_index(flat_logfc.argmax(0), raw_logfc.shape[:2])
+        # if logfc is maximal in the lower triangular matrix, multiply it with -1
+        logfc = raw_logfc[r, c, np.arange(raw_logfc.shape[-1])] * np.where(r <= c, 1, -1)
+
+        res = pd.DataFrame({
+            "gene": self.gene_ids,
+            "pval": np.min(pval, axis=(0, 1)),
+            "qval": np.min(qval, axis=(0, 1)),
+            "log2fc": np.asarray(logfc),
+            "mean": np.asarray(self.mean)
+        })
+
+        res = self._threshold_summary(
+            res=res,
+            qval_thres=qval_thres,
+            fc_upper_thres=fc_upper_thres,
+            fc_lower_thres=fc_lower_thres,
+            mean_thres=mean_thres
+        )
+
+        return res
+
+
+class DifferentialExpressionTestPairwiseStandard(_DifferentialExpressionTestPairwiseBase):
+    """
+    Pairwise differential expression tests class for tests other than z-tests.
+    """
+
+    def __init__(
+            self,
+            gene_ids,
+            pval,
+            logfc,
+            ave,
+            groups,
+            tests,
+            correction_type: str
+    ):
+        super().__init__(correction_type=correction_type)
+        self._gene_ids = np.asarray(gene_ids)
+        self._logfc = logfc
+        self._pval = pval
+        self._mean = np.asarray(ave).flatten()
+        self.groups = list(np.asarray(groups))
+        self._tests = tests
+
+        _ = self.qval
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return self._gene_ids
+
+    @property
+    def x(self):
+        return None
+
+    @property
+    def tests(self):
+        """
+        If `keep_full_test_objs` was set to `True`, this will return a matrix of differential expression tests.
+        """
+        if self._tests is None:
+            raise ValueError("Individual tests were not kept!")
+
+        return self._tests
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        return self._pval[idx0, :, :][:, idx1, :]
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        assert np.all([x < self._pval.shape[1] for x in idx0])
+        assert np.all([x < self._pval.shape[1] for x in idx1])
+        if base == np.e:
+            return self._logfc[idx0, :, :][:, idx1, :]
+        else:
+            return self._logfc[idx0, :, :][:, idx1, :] / np.log(base)
+
+
+class DifferentialExpressionTestZTest(_DifferentialExpressionTestPairwiseBase):
+    """
+    Pairwise unit_test between more than 2 groups per gene. This close harbors tests that are precomputed
+    across all pairs of groups. DifferentialExpressionTestZTestLazy is the alternative class that only allows
+    lazy test evaluation.
+    """
+
+    model_estim: glm.typing.EstimatorBaseTyping
+    theta_mle: np.ndarray
+    theta_sd: np.ndarray
+
+    def __init__(
+            self,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            grouping,
+            groups,
+            correction_type: str
+    ):
+        super().__init__(correction_type=correction_type)
+        self.model_estim = model_estim
+        self.grouping = grouping
+        self.groups = list(np.asarray(groups))
+
+        # Values of parameter estimates: coefficients x genes array with one coefficient per group
+        self._theta_mle = model_estim.a_var
+        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
+        # Need .copy() here as nextafter needs mutabls copy.
+        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
+        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
+        self._theta_sd = np.sqrt(theta_sd)
+        self._logfc = None
+
+        # Call tests in constructor.
+        _ = self.pval
+        _ = self.qval
+
+    def _test(self, **kwargs):
+        pvals = np.tile(np.NaN, [len(self.groups), len(self.groups), self.model_estim.x.shape[1]])
+        for i, g1 in enumerate(self.groups):
+            for j, g2 in enumerate(self.groups[(i + 1):]):
+                j = j + i + 1
+                pvals[i, j, :] = stats.two_coef_z_test(
+                    theta_mle0=self._theta_mle[i, :],
+                    theta_mle1=self._theta_mle[j, :],
+                    theta_sd0=self._theta_sd[i, :],
+                    theta_sd1=self._theta_sd[j, :]
+                )
+                pvals[j, i, :] = pvals[i, j, :]
+
+        return pvals
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return np.asarray(self.model_estim.input_data.features)
+
+    @property
+    def x(self):
+        return self.model_estim.x
+
+    @property
+    def log_likelihood(self):
+        return self.model_estim.log_likelihood
+
+    @property
+    def model_gradient(self):
+        return self.model_estim.jacobian
+
+    def _ave(self):
+        """
+        Returns the mean expression by gene
+
+        :return: np.ndarray
+        """
+
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        assert np.all([x < self.pval.shape[1] for x in idx0])
+        assert np.all([x < self.pval.shape[1] for x in idx1])
+        return self.pval[idx0, :, :][:, idx1, :]
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of groups1 to groups2.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        logfc = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                logfc[i, j, :] = self._theta_mle[xj, :] - self._theta_mle[xi, :]
+                logfc[j, i, :] = -logfc[i, j, :]
+
+        if base == np.e:
+            return logfc
+        else:
+            return logfc / np.log(base)
+
+
+class _DifferentialExpressionTestPairwiseLazyBase(_DifferentialExpressionTestPairwiseBase):
+    """
+    Lazy pairwise differential expression tests base class.
+
+    In addition to the method of a standard pairwise test, this base class throws errors for attributes
+    that are not accessible in a lazy pairwise test.
+    """
+
+    def _test(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def pval(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def qval(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    def log_fold_change(self, base=np.e, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @property
+    def summary(self, **kwargs):
+        """
+        This function is not available in lazy results evaluation as it would
+        require all pairwise tests to be performed.
+        """
+        raise ValueError("This function is not available in lazy results evaluation as it would "
+                         "require all pairwise tests to be performed.")
+
+    @abc.abstractmethod
+    def _test_pairs(self, idx0, idx1):
+        """
+        Run differential expression tests on selected pairs of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pass
+
+    def _pval_pairs(self, idx0, idx1):
+        """
+        Test-specific p-values accessor for the comparison of groups0 to groups1.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        return self._test_pairs(idx0=idx0, idx1=idx1)
+
+
+class DifferentialExpressionTestZTestLazy(_DifferentialExpressionTestPairwiseLazyBase):
+    """
+    Pairwise unit_test between more than 2 groups per gene with lazy evaluation.
+
+    This class performs pairwise tests upon enquiry only and does not store them
+    and is therefore suited so very large group sets for which the lfc and
+    p-value matrices of the size [genes, groups, groups] are too big to fit into
+    memory.
+    """
+
+    model_estim: glm.typing.EstimatorBaseTyping
+    _theta_mle: np.ndarray
+    _theta_sd: np.ndarray
+
+    def __init__(
+            self,
+            model_estim: glm.typing.EstimatorBaseTyping,
+            grouping, groups,
+            correction_type="global"
+    ):
+        _DifferentialExpressionTestMulti.__init__(
+            self=self,
+            correction_type=correction_type
+        )
+        self.model_estim = model_estim
+        self.grouping = grouping
+        if isinstance(groups, list):
+            self.groups = groups
+        else:
+            self.groups = groups.tolist()
+
+        # Values of parameter estimates: coefficients x genes array with one coefficient per group
+        self._theta_mle = model_estim.a_var
+        # Standard deviation of estimates: coefficients x genes array with one coefficient per group
+        # Need .copy() here as nextafter needs mutabls copy.
+        theta_sd = np.diagonal(model_estim.fisher_inv, axis1=-2, axis2=-1).T.copy()
+        theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf))
+        self._theta_sd = np.sqrt(theta_sd)
+
+    def _test_pairs(self, idx0, idx1):
+        """
+        Run differential expression tests on selected pairs of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :return: p-values
+        """
+        pvals = np.tile(np.NaN, [len(idx0), len(idx1), self.model_estim.x.shape[1]])
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                if i != j:
+                    pvals[i, j, :] = stats.two_coef_z_test(
+                        theta_mle0=self._theta_mle[xi, :],
+                        theta_mle1=self._theta_mle[xj, :],
+                        theta_sd0=self._theta_sd[xi, :],
+                        theta_sd1=self._theta_sd[xj, :]
+                    )
+                else:
+                    pvals[i, j, :] = np.array([1.])
+
+        return pvals
+
+    @property
+    def gene_ids(self) -> np.ndarray:
+        return np.asarray(self.model_estim.input_data.features)
+
+    @property
+    def x(self):
+        return self.model_estim.x
+
+    @property
+    def log_likelihood(self):
+        return self.model_estim.log_likelihood
+
+    @property
+    def model_gradient(self):
+        return self.model_estim.jacobian
+
+    def _ave(self):
+        """
+        Returns the mean expression by gene
+
+        :return: np.ndarray
+        """
+        return np.asarray(np.mean(self.model_estim.x, axis=0)).flatten()
+
+    def _log_fold_change_pairs(self, idx0, idx1, base):
+        """
+        Test-specific log fold change-values accessor for the comparison of both sets of groups.
+
+        :param idx0: List of indices of first set of group of observations in pair-wise comparison.
+        :param idx1: List of indices of second set of group of observations in pair-wise comparison.
+        :param base: Base of logarithm.
+        :return: log fold change values
+        """
+        logfc = np.zeros(shape=(len(idx0), len(idx1), self._theta_mle.shape[1]))
+        for i, xi in enumerate(idx0):
+            for j, xj in enumerate(idx1):
+                logfc[i, j, :] = self._theta_mle[xi, :] - self._theta_mle[xj, :]
+
+        if base == np.e:
+            return logfc
+        else:
+            return logfc / np.log(base)
diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py
index 3242e09..a8ac7c8 100644
--- a/diffxpy/testing/tests.py
+++ b/diffxpy/testing/tests.py
@@ -12,14 +12,14 @@
 from typing import Union, List, Dict, Callable, Tuple
 
 from diffxpy import pkg_constants
-from diffxpy.models.batch_bfgs.optim import Estim_BFGS
 from .det import DifferentialExpressionTestLRT, DifferentialExpressionTestWald, \
     DifferentialExpressionTestTT, DifferentialExpressionTestRank, _DifferentialExpressionTestSingle, \
-    DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, DifferentialExpressionTestPairwise, \
-    DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition, \
-    DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
+    DifferentialExpressionTestVsRest, _DifferentialExpressionTestMulti, DifferentialExpressionTestByPartition
+from .det_cont import DifferentialExpressionTestWaldCont, DifferentialExpressionTestLRTCont
+from .det_pair import DifferentialExpressionTestZTestLazy, DifferentialExpressionTestZTest, \
+    DifferentialExpressionTestPairwiseStandard
 from .utils import parse_gene_names, parse_sample_description, parse_size_factors, parse_grouping, \
-    constraint_system_from_star
+    constraint_system_from_star, preview_coef_names
 
 
 def _fit(
@@ -27,6 +27,8 @@ def _fit(
         data,
         design_loc,
         design_scale,
+        design_loc_names: list = None,
+        design_scale_names: list = None,
         constraints_loc: np.ndarray = None,
         constraints_scale: np.ndarray = None,
         init_model=None,
@@ -35,11 +37,12 @@ def _fit(
         gene_names=None,
         size_factors=None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
         close_session=True,
         dtype="float64"
-) -> glm.typing.InputDataBase:
+):
     """
     :param noise_model: str, noise model to use in model-based unit_test. Possible options:
 
@@ -86,24 +89,15 @@ def _fit(
     :param size_factors: 1D array of transformed library size factors for each cell in the
         same order as in data
     :param batch_size: the batch size to use for the estimator
-    :param training_strategy: {str, function, list} training strategy to use. Can be:
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
 
-        - str: will use Estimator.TrainingStrategy[training_strategy] to train
-        - function: Can be used to implement custom training function will be called as
-          `training_strategy(estimator)`.
-        - list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
-          method arguments.
-
-          Example:
-
-          .. code-block:: python
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
+    :param training_strategy: {str} training strategy to use. Can be:
 
-              [
-                {"learning_rate": 0.5, },
-                {"learning_rate": 0.05, },
-              ]
-
-          This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
+        - str: will use Estimator.TrainingStrategy[training_strategy] to train
     :param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
 
         Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
@@ -125,62 +119,57 @@ def _fit(
         "irls_gd_tr": pkg_constants.BATCHGLM_OPTIM_IRLS_GD_TR
     }
 
-    if isinstance(training_strategy, str) and training_strategy.lower() == 'bfgs':
-        assert False, "depreceated"
-        lib_size = np.zeros(data.shape[0])
+    if backend.lower() in ["tf1"]:
         if noise_model == "nb" or noise_model == "negative_binomial":
-            estim = Estim_BFGS(X=data, design_loc=design_loc, design_scale=design_scale,
-                               lib_size=lib_size, batch_size=batch_size, feature_names=gene_names)
-            estim.run(nproc=3, maxiter=10000, debug=False)
-            model = estim.return_batchglm_formated_model()
+            from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM
+        elif noise_model == "norm" or noise_model == "normal":
+            from batchglm.api.models.tf1.glm_norm import Estimator, InputDataGLM
         else:
-            raise ValueError('base.test(): `noise_model="%s"` not recognized.' % noise_model)
-    else:
+            raise ValueError('noise_model="%s" not recognized.' % noise_model)
+    elif backend.lower() in ["numpy"]:
+        if isinstance(training_strategy, str):
+            if training_strategy.lower() == "auto":
+                training_strategy = "DEFAULT"
         if noise_model == "nb" or noise_model == "negative_binomial":
-            from batchglm.api.models.glm_nb import Estimator, InputDataGLM
-        elif noise_model == "norm" or noise_model == "normal":
-            from batchglm.api.models.glm_norm import Estimator, InputDataGLM
+            from batchglm.api.models.numpy.glm_nb import Estimator, InputDataGLM
         else:
-            raise ValueError('base.test(): `noise_model="%s"` not recognized.' % noise_model)
-
-        input_data = InputDataGLM(
-            data=data,
-            design_loc=design_loc,
-            design_scale=design_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
-            size_factors=size_factors,
-            feature_names=gene_names,
-        )
+            raise ValueError('noise_model="%s" not recognized.' % noise_model)
+    else:
+        raise ValueError('models="%s" not recognized.' % backend)
 
-        constructor_args = {}
-        if batch_size is not None:
-            constructor_args["batch_size"] = batch_size
-        if quick_scale is not None:
-            constructor_args["quick_scale"] = quick_scale
-        estim = Estimator(
-            input_data=input_data,
-            init_model=init_model,
-            init_a=init_a,
-            init_b=init_b,
-            provide_optimizers=provide_optimizers,
-            provide_batched=pkg_constants.BATCHGLM_PROVIDE_BATCHED,
-            provide_fim=pkg_constants.BATCHGLM_PROVIDE_FIM,
-            provide_hessian=pkg_constants.BATCHGLM_PROVIDE_HESSIAN,
-            dtype=dtype,
-            **constructor_args
-        )
-        estim.initialize()
+    input_data = InputDataGLM(
+        data=data,
+        design_loc=design_loc,
+        design_scale=design_scale,
+        design_loc_names=design_loc_names,
+        design_scale_names=design_scale_names,
+        constraints_loc=constraints_loc,
+        constraints_scale=constraints_scale,
+        size_factors=size_factors,
+        feature_names=gene_names,
+    )
 
-        # Training:
-        if callable(training_strategy):
-            # call training_strategy if it is a function
-            training_strategy(estim)
-        else:
-            estim.train_sequence(training_strategy=training_strategy)
+    constructor_args = {}
+    if batch_size is not None:
+        constructor_args["batch_size"] = batch_size
+    if quick_scale is not None:
+        constructor_args["quick_scale"] = quick_scale
+    estim = Estimator(
+        input_data=input_data,
+        init_a=init_a,
+        init_b=init_b,
+        provide_optimizers=provide_optimizers,
+        provide_batched=pkg_constants.BATCHGLM_PROVIDE_BATCHED,
+        provide_fim=pkg_constants.BATCHGLM_PROVIDE_FIM,
+        provide_hessian=pkg_constants.BATCHGLM_PROVIDE_HESSIAN,
+        dtype=dtype,
+        **constructor_args
+    )
+    estim.initialize()
+    estim.train_sequence(training_strategy=training_strategy)
 
-        if close_session:
-            estim.finalize()
+    if close_session:
+        estim.finalize()
 
     return estim
 
@@ -199,6 +188,7 @@ def lrt(
         noise_model="nb",
         size_factors: Union[np.ndarray, pd.core.series.Series, np.ndarray] = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = False,
         dtype="float64",
@@ -213,16 +203,12 @@ def lrt(
     :param data: Input data matrix (observations x features) or (cells x genes).
     :param full_formula_loc: formula
         Full model formula for location parameter model.
-        If not specified, `full_formula` will be used instead.
     :param reduced_formula_loc: formula
         Reduced model formula for location and scale parameter models.
-        If not specified, `reduced_formula` will be used instead.
     :param full_formula_scale: formula
         Full model formula for scale parameter model.
-        If not specified, `reduced_formula_scale` will be used instead.
     :param reduced_formula_scale: formula
         Reduced model formula for scale parameter model.
-        If not specified, `reduced_formula` will be used instead.
     :param as_numeric:
         Which columns of sample_description to treat as numeric and
         not as categorical. This yields columns in the design matrix
@@ -256,6 +242,12 @@ def lrt(
         same order as in data or string-type column identifier of size-factor containing
         column in sample description.
     :param batch_size: the batch size to use for the estimator
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -334,6 +326,7 @@ def lrt(
         gene_names=gene_names,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
@@ -352,6 +345,7 @@ def lrt(
         init_model=reduced_model,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
@@ -387,6 +381,7 @@ def wald(
         noise_model: str = "nb",
         size_factors: Union[np.ndarray, pd.core.series.Series, str] = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = False,
         dtype="float64",
@@ -406,10 +401,8 @@ def wald(
         the exact coefficients which are to be tested.
     :param formula_loc: formula
         model formula for location and scale parameter models.
-        If not specified, `formula` will be used instead.
     :param formula_scale: formula
         model formula for scale parameter model.
-        If not specified, `formula` will be used instead.
     :param as_numeric:
         Which columns of sample_description to treat as numeric and
         not as categorical. This yields columns in the design matrix
@@ -513,6 +506,12 @@ def wald(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -557,8 +556,8 @@ def wald(
         sample_description=sample_description
     )
 
-    logging.getLogger("diffxpy").debug("building location model")
-    design_loc, constraints_loc = constraint_system_from_star(
+    # Build design matrices and constraints.
+    design_loc, design_loc_names, constraints_loc, term_names_loc = constraint_system_from_star(
         dmat=dmat_loc,
         sample_description=sample_description,
         formula=formula_loc,
@@ -566,8 +565,7 @@ def wald(
         constraints=constraints_loc,
         return_type="patsy"
     )
-    logging.getLogger("diffxpy").debug("building scale model")
-    design_scale, constraints_scale = constraint_system_from_star(
+    design_scale, design_scale_names, constraints_scale, term_names_scale = constraint_system_from_star(
         dmat=dmat_scale,
         sample_description=sample_description,
         formula=formula_scale,
@@ -578,13 +576,20 @@ def wald(
 
     # Define indices of coefficients to test:
     constraints_loc_temp = constraints_loc if constraints_loc is not None else np.eye(design_loc.shape[-1])
+    # Check that design_loc is patsy, otherwise  use term_names for slicing.
     if factor_loc_totest is not None:
-        # Select coefficients to test via formula model:
-        col_indices = np.concatenate([
-            np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
-            for x in factor_loc_totest
-        ])
-        assert col_indices.size > 0, "Could not find any matching columns!"
+        if not isinstance(design_loc, patsy.design_info.DesignMatrix):
+            col_indices = np.where([
+                x in factor_loc_totest
+                for x in term_names_loc
+            ])[0]
+        else:
+            # Select coefficients to test via formula model:
+            col_indices = np.concatenate([
+                np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
+                for x in factor_loc_totest
+            ])
+        assert len(col_indices) > 0, "Could not find any matching columns!"
         if coef_to_test is not None:
             if len(factor_loc_totest) > 1:
                 raise ValueError("do not set coef_to_test if more than one factor_loc_totest is given")
@@ -595,8 +600,14 @@ def wald(
                 design_loc[:, col_indices] = np.where(samples, 1, 0)
     elif coef_to_test is not None:
         # Directly select coefficients to test from design matrix:
-        # Check that coefficients to test are not dependent parameters if constraints are given:
-        coef_loc_names = glm.data.view_coef_names(design_loc).tolist()
+        if sample_description is not None:
+            coef_loc_names = preview_coef_names(
+                sample_description=sample_description,
+                formula=formula_loc,
+                as_numeric=as_numeric
+            )
+        else:
+            coef_loc_names = dmat_loc.columns.tolist()
         if not np.all([x in coef_loc_names for x in coef_to_test]):
             raise ValueError(
                 "the requested test coefficients %s were found in model coefficients %s" %
@@ -610,16 +621,19 @@ def wald(
         raise ValueError("either set factor_loc_totest or coef_to_test")
     # Check that all tested coefficients are independent:
     for x in col_indices:
-        if np.sum(constraints_loc_temp[x,:]) != 1:
+        if np.sum(constraints_loc_temp[x, :]) != 1:
             raise ValueError("Constraints input is wrong: not all tested coefficients are unconstrained.")
     # Adjust tested coefficients from dependent to independent (fitted) parameters:
-    col_indices = np.array([np.where(constraints_loc_temp[x,:] == 1)[0][0] for x in col_indices])
+    col_indices = np.array([np.where(constraints_loc_temp[x, :] == 1)[0][0] for x in col_indices])
 
+    # Fit model.
     model = _fit(
         noise_model=noise_model,
         data=data,
         design_loc=design_loc,
         design_scale=design_scale,
+        design_loc_names=design_loc_names,
+        design_scale_names=design_scale_names,
         constraints_loc=constraints_loc,
         constraints_scale=constraints_scale,
         init_a=init_a,
@@ -627,19 +641,20 @@ def wald(
         gene_names=gene_names,
         size_factors=size_factors,
         batch_size=batch_size,
+        backend=backend,
         training_strategy=training_strategy,
         quick_scale=quick_scale,
         dtype=dtype,
         **kwargs,
     )
 
+    # Prepare differential expression test.
     de_test = DifferentialExpressionTestWald(
         model_estim=model,
         col_indices=col_indices,
         noise_model=noise_model,
         sample_description=sample_description
     )
-
     return de_test
 
 
@@ -737,6 +752,7 @@ def two_sample(
         noise_model: str = None,
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = None,
@@ -800,6 +816,12 @@ def two_sample(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -851,6 +873,7 @@ def two_sample(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -877,6 +900,7 @@ def two_sample(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -913,6 +937,7 @@ def pairwise(
         noise_model: str = "nb",
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = False,
@@ -988,6 +1013,12 @@ def pairwise(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1041,6 +1072,7 @@ def pairwise(
             init_a="closed_form",
             init_b="closed_form",
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -1095,6 +1127,7 @@ def pairwise(
                     noise_model=noise_model,
                     size_factors=size_factors[idx] if size_factors is not None else None,
                     batch_size=batch_size,
+                    backend=backend,
                     training_strategy=training_strategy,
                     quick_scale=quick_scale,
                     is_sig_zerovar=is_sig_zerovar,
@@ -1109,7 +1142,7 @@ def pairwise(
                     tests[i, j] = de_test_temp
                     tests[j, i] = de_test_temp
 
-        de_test = DifferentialExpressionTestPairwise(
+        de_test = DifferentialExpressionTestPairwiseStandard(
             gene_ids=gene_names,
             pval=pvals,
             logfc=logfc,
@@ -1132,6 +1165,7 @@ def versus_rest(
         noise_model: str = None,
         size_factors: np.ndarray = None,
         batch_size: int = None,
+        backend: str = "numpy",
         training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         is_sig_zerovar: bool = True,
         quick_scale: bool = None,
@@ -1206,6 +1240,12 @@ def versus_rest(
 
         - 'nb': default
     :param batch_size: The batch size to use for the estimator.
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1259,6 +1299,7 @@ def versus_rest(
             sample_description=sample_description,
             noise_model=noise_model,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             size_factors=size_factors,
@@ -1365,6 +1406,7 @@ def two_sample(
             size_factors: np.ndarray = None,
             noise_model: str = None,
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             is_sig_zerovar: bool = True,
             **kwargs
@@ -1395,6 +1437,12 @@ def two_sample(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+        :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1419,6 +1467,7 @@ def two_sample(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 is_sig_zerovar=is_sig_zerovar,
                 **kwargs
@@ -1517,6 +1566,7 @@ def lrt(
             size_factors: np.ndarray = None,
             noise_model="nb",
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             **kwargs
     ):
@@ -1569,6 +1619,12 @@ def lrt(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+        :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1594,6 +1650,7 @@ def lrt(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 **kwargs
             ))
@@ -1615,6 +1672,7 @@ def wald(
             noise_model: str = "nb",
             size_factors: np.ndarray = None,
             batch_size: int = None,
+            backend: str = "numpy",
             training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
             **kwargs
     ):
@@ -1669,6 +1727,12 @@ def wald(
 
             - 'nb': default
         :param batch_size: The batch size to use for the estimator.
+            :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+            training strategies. Available are:
+
+            - "numpy" numpy
+            - "tf1" tensorflow1.* >= 1.13
+            - "tf2" tensorflow2.*
         :param training_strategy: {str, function, list} training strategy to use. Can be:
 
             - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1694,6 +1758,7 @@ def wald(
                 noise_model=noise_model,
                 size_factors=size_factors[idx] if size_factors is not None else None,
                 batch_size=batch_size,
+                backend=backend,
                 training_strategy=training_strategy,
                 **kwargs
             ))
@@ -1705,24 +1770,26 @@ def wald(
 
 
 def continuous_1d(
-        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix],
+        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix, glm.typing.InputDataBase],
         continuous: str,
-        df: int = 5,
-        factor_loc_totest: Union[str, List[str]] = None,
-        formula_loc: str = None,
+        factor_loc_totest: Union[str, List[str]],
+        formula_loc: str,
         formula_scale: str = "~1",
+        df: int = 5,
+        spline_basis: str = "bs",
         as_numeric: Union[List[str], Tuple[str], str] = (),
         test: str = 'wald',
         init_a: Union[np.ndarray, str] = "standard",
         init_b: Union[np.ndarray, str] = "standard",
         gene_names: Union[np.ndarray, list] = None,
-        sample_description=None,
+        sample_description: Union[None, pd.DataFrame] = None,
         constraints_loc: Union[dict, None] = None,
         constraints_scale: Union[dict, None] = None,
         noise_model: str = 'nb',
-        size_factors: np.ndarray = None,
+        size_factors: Union[np.ndarray, pd.core.series.Series, str] = None,
         batch_size: int = None,
-        training_strategy: Union[str, List[Dict[str, object]], Callable] = "DEFAULT",
+        backend: str = "numpy",
+        training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
         quick_scale: bool = None,
         dtype="float64",
         **kwargs
@@ -1754,7 +1821,14 @@ def continuous_1d(
     :param df: int
         Degrees of freedom of the spline model, i.e. the number of spline basis vectors.
         df is equal to the number of coefficients in the GLM which are used to describe the
-        continuous depedency-
+        continuous depedency.
+    :param spline_basis: str
+        The type of sline basis to use, refer also to:
+            https://patsy.readthedocs.io/en/latest/spline-regression.html
+
+            - "bs": B-splines
+            - "cr": natural cubic splines
+            - "cc": natural cyclic splines
     :param factor_loc_totest:
         List of factors of formula to test with Wald test.
         E.g. "condition" or ["batch", "condition"] if formula_loc would be "~ 1 + batch + condition"
@@ -1852,6 +1926,12 @@ def continuous_1d(
     :param size_factors: 1D array of transformed library size factors for each cell in the
         same order as in data
     :param batch_size: the batch size to use for the estimator
+    :param backend: Which linear algebra library to chose. This impact the available noise models and optimizers /
+        training strategies. Available are:
+
+        - "numpy" numpy
+        - "tf1" tensorflow1.* >= 1.13
+        - "tf2" tensorflow2.*
     :param training_strategy: {str, function, list} training strategy to use. Can be:
 
         - str: will use Estimator.TrainingStrategy[training_strategy] to train
@@ -1880,10 +1960,7 @@ def continuous_1d(
     """
     if formula_loc is None:
         raise ValueError("supply fomula_loc")
-    # Set testing default to continuous covariate if not supplied:
-    if factor_loc_totest is None:
-        factor_loc_totest = [continuous]
-    elif isinstance(factor_loc_totest, str):
+    if isinstance(factor_loc_totest, str):
         factor_loc_totest = [factor_loc_totest]
     elif isinstance(factor_loc_totest, tuple):
         factor_loc_totest = list(factor_loc_totest)
@@ -1896,39 +1973,75 @@ def continuous_1d(
     gene_names = parse_gene_names(data, gene_names)
     sample_description = parse_sample_description(data, sample_description)
 
-    # Check that continuous factor is contained in sample description
+    # Check that continuous factor is contained in sample description and is numeric.
     if continuous not in sample_description.columns:
         raise ValueError('parameter continuous not found in sample_description')
-
+    if not np.issubdtype(sample_description[continuous].dtype, np.number):
+        raise ValueError(
+            "The column corresponding to the continuous covariate ('%s') in " % continuous +
+            "sample description should be numeric but is '%s'" % sample_description[continuous].dtype
+        )
+    # Check that not too many degrees of freedom given the sampled points were chosen:
+    if len(np.unique(sample_description[continuous].values)) <= df - 1:
+        raise ValueError(
+            "Use at most (number of observed points in continuous covariate) - 1 degrees of freedom " +
+            " for spline basis. You chose df=%i for n=%i points." %
+            (df, len(np.unique(sample_description[continuous].values)))
+        )
     # Perform spline basis transform.
-    spline_basis = patsy.highlevel.dmatrix("0+bs(" + continuous + ", df=" + str(df) + ")", sample_description)
+    if spline_basis.lower() == "bs":
+        spline_basis = patsy.highlevel.dmatrix(
+            "bs(" + continuous + ", df=" + str(df) + ", degree=3, include_intercept=False) - 1",
+            sample_description
+        )
+    elif spline_basis.lower() == "cr":
+        spline_basis = patsy.highlevel.dmatrix(
+            "cr(" + continuous + ", df=" + str(df) + ", constraints='center') - 1",
+            sample_description
+        )
+    elif spline_basis.lower() == "cc":
+        spline_basis = patsy.highlevel.dmatrix(
+            "cc(" + continuous + ", df=" + str(df) + ", constraints='center') - 1",
+            sample_description
+        )
+    else:
+        raise ValueError("spline basis %s not recognized" % spline_basis)
     spline_basis = pd.DataFrame(spline_basis)
     new_coefs = [continuous + str(i) for i in range(spline_basis.shape[1])]
     spline_basis.columns = new_coefs
     formula_extension = '+'.join(new_coefs)
+    # Generated interpolated spline bases.
+    # Safe interpolated interval in last column, need to extract later.
+    interpolated_interval = np.linspace(
+        np.min(sample_description[continuous].values),
+        np.max(sample_description[continuous].values),
+        100
+    )
+    interpolated_spline_basis = np.hstack([
+        np.ones([100, 1]),
+        patsy.highlevel.dmatrix(
+            "0+bs(" + continuous + ", df=" + str(df) + ")",
+            pd.DataFrame({continuous: interpolated_interval})
+        ).base,
+        np.expand_dims(interpolated_interval, axis=1)
+    ])
 
     # Replace continuous factor in formulas by spline basis coefficients.
     # Note that the brackets around formula_term_continuous propagate the sum
     # across interaction terms.
     formula_term_continuous = '(' + formula_extension + ')'
 
-    if formula_loc is not None:
-        formula_loc_new = formula_loc.split(continuous)
-        formula_loc_new = formula_term_continuous.join(formula_loc_new)
-    else:
-        formula_loc_new = None
+    formula_loc_new = formula_loc.split(continuous)
+    formula_loc_new = formula_term_continuous.join(formula_loc_new)
 
-    if formula_scale is not None:
-        formula_scale_new = formula_scale.split(continuous)
-        formula_scale_new = formula_term_continuous.join(formula_scale_new)
-    else:
-        formula_scale_new = None
+    formula_scale_new = formula_scale.split(continuous)
+    formula_scale_new = formula_term_continuous.join(formula_scale_new)
 
     # Add spline basis into sample description
     for x in spline_basis.columns:
         sample_description[x] = spline_basis[x].values
 
-    # Add spline basis to continuous covariate list
+    # Add spline basis to continuous covariates list
     as_numeric.extend(new_coefs)
 
     if test.lower() == 'wald':
@@ -1938,23 +2051,34 @@ def continuous_1d(
         # Adjust factors / coefficients to test:
         # Note that the continuous covariate does not necessarily have to be tested,
         # it could also be a condition effect or similar.
-        # TODO handle interactions
         if continuous in factor_loc_totest:
             # Create reduced set of factors to test which does not contain continuous:
-            factor_loc_totest_new = [x for x in factor_loc_totest if x != continuous]
+            factor_loc_totest_intermediate = [x for x in factor_loc_totest if x != continuous]
             # Add spline basis terms in instead of continuous term:
-            factor_loc_totest_new.extend(new_coefs)
+            factor_loc_totest_intermediate.extend(new_coefs)
         else:
-            factor_loc_totest_new = factor_loc_totest
+            factor_loc_totest_intermediate = factor_loc_totest
+        # Replace continuous factor in interaction terms with new spline factors.
+        factor_loc_totest_final = []
+        for i, x in enumerate(factor_loc_totest_intermediate):
+            if len(x.split(":")) > 1:
+                if np.any([x == continuous for x in x.split(":")]):
+                    interaction_partner = [y for y in x.split(":") if y != continuous][0]
+                    for y in new_coefs:
+                        factor_loc_totest_final.append(y+":"+interaction_partner)
+                else:
+                    factor_loc_totest_final.append(x)
+            else:
+                factor_loc_totest_final.append(x)
 
         logging.getLogger("diffxpy").debug("model formulas assembled in de.test.continuos():")
-        logging.getLogger("diffxpy").debug("factor_loc_totest_new: " + ",".join(factor_loc_totest_new))
+        logging.getLogger("diffxpy").debug("factor_loc_totest_final: " + ",".join(factor_loc_totest_final))
         logging.getLogger("diffxpy").debug("formula_loc_new: " + formula_loc_new)
         logging.getLogger("diffxpy").debug("formula_scale_new: " + formula_scale_new)
 
         de_test = wald(
             data=data,
-            factor_loc_totest=factor_loc_totest_new,
+            factor_loc_totest=factor_loc_totest_final,
             coef_to_test=None,
             formula_loc=formula_loc_new,
             formula_scale=formula_scale_new,
@@ -1968,6 +2092,7 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -1978,7 +2103,8 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             continuous_coords=sample_description[continuous].values,
-            spline_coefs=new_coefs
+            spline_coefs=new_coefs,
+            interpolated_spline_basis=interpolated_spline_basis
         )
     elif test.lower() == 'lrt':
         if noise_model is None:
@@ -2019,6 +2145,7 @@ def continuous_1d(
             noise_model=noise_model,
             size_factors=size_factors,
             batch_size=batch_size,
+            backend=backend,
             training_strategy=training_strategy,
             quick_scale=quick_scale,
             dtype=dtype,
@@ -2028,7 +2155,8 @@ def continuous_1d(
             de_test=de_test,
             size_factors=size_factors,
             continuous_coords=sample_description[continuous].values,
-            spline_coefs=new_coefs
+            spline_coefs=new_coefs,
+            noise_model=noise_model
         )
     else:
         raise ValueError('continuous(): Parameter `test` not recognized.')
diff --git a/diffxpy/testing/utils.py b/diffxpy/testing/utils.py
index fdefa7e..295f386 100644
--- a/diffxpy/testing/utils.py
+++ b/diffxpy/testing/utils.py
@@ -52,19 +52,20 @@ def parse_sample_description(
                 "with corresponding sample annotations"
             )
 
-    if anndata is not None and isinstance(data, Raw):
-        # Raw does not have attribute shape.
-        assert data.X.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.X.shape[0], sample_description.shape[0])
-    elif isinstance(data, glm.typing.InputDataBase):
-        assert data.x.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.x.shape[0], sample_description.shape[0])
-    else:
-        assert data.shape[0] == sample_description.shape[0], \
-            "data matrix and sample description must contain same number of cells: %i, %i" % \
-            (data.shape[0], sample_description.shape[0])
+    if sample_description is not None:
+        if anndata is not None and isinstance(data, Raw):
+            # Raw does not have attribute shape.
+            assert data.X.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.X.shape[0], sample_description.shape[0])
+        elif isinstance(data, glm.typing.InputDataBase):
+            assert data.x.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.x.shape[0], sample_description.shape[0])
+        else:
+            assert data.shape[0] == sample_description.shape[0], \
+                "data matrix and sample description must contain same number of cells: %i, %i" % \
+                (data.shape[0], sample_description.shape[0])
     return sample_description
 
 
@@ -89,7 +90,14 @@ def parse_size_factors(
         elif isinstance(size_factors, str):
             assert size_factors in sample_description.columns, ""
             size_factors = sample_description[size_factors].values
-        assert size_factors.shape[0] == data.shape[0], "data matrix and size factors must contain same number of cells"
+
+        if anndata is not None and isinstance(data, Raw):
+            data_shape = data.X.shape
+        elif isinstance(data, glm.typing.InputDataBase):
+            data_shape = data.x.shape
+        else:
+            data_shape = data.shape
+        assert size_factors.shape[0] == data_shape[0], "data matrix and size factors must contain same number of cells"
         assert np.all(size_factors > 0), "size_factors <= 0 found, please remove these cells"
     return size_factors
 
@@ -117,8 +125,7 @@ def dmat_unique(dmat, sample_description):
 
 
 def design_matrix(
-        data: Union[anndata.AnnData, Raw, np.ndarray,
-                    scipy.sparse.csr_matrix] = None,
+        data: Union[anndata.AnnData, Raw, np.ndarray, scipy.sparse.csr_matrix] = None,
         sample_description: Union[None, pd.DataFrame] = None,
         formula: Union[None, str] = None,
         as_numeric: Union[List[str], Tuple[str], str] = (),
@@ -206,7 +213,7 @@ def preview_coef_names(
 
 
 def constraint_system_from_star(
-        dmat: Union[None, np.ndarray] = None,
+        dmat: Union[None, patsy.design_info.DesignMatrix] = None,
         sample_description: Union[None, pd.DataFrame] = None,
         formula: Union[None, str] = None,
         as_numeric: Union[List[str], Tuple[str], str] = (),
diff --git a/diffxpy/unit_test/test_constrained.py b/diffxpy/unit_test/test_constrained.py
index 78b4851..545bead 100644
--- a/diffxpy/unit_test/test_constrained.py
+++ b/diffxpy/unit_test/test_constrained.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
@@ -21,6 +21,7 @@ def test_forfatal_from_string(self):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
         n_genes = 2
 
@@ -39,21 +40,23 @@ def test_forfatal_from_string(self):
         coefficient_names = ['intercept', 'bio1', 'bio2', 'bio3', 'bio4', 'treatment1']
         dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
 
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est)
+        dmat_est_loc, _ = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_scale, _ = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
 
         # Build constraints:
         constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
+            dmat=dmat_est_loc.values,
+            coef_names=dmat_est_loc.columns,
             constraints=["bio1+bio2=0", "bio3+bio4=0"]
         )
         constraints_scale = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_scale,
+            dmat=dmat_est_scale.values,
+            coef_names=dmat_est_scale.columns,
             constraints=["bio1+bio2=0", "bio3+bio4=0"]
         )
 
         test = de.test.wald(
-            data=sim.x,
+            data=sim.input_data,
             dmat_loc=dmat_est_loc,
             dmat_scale=dmat_est_scale,
             constraints_loc=constraints_loc,
@@ -70,6 +73,7 @@ def test_forfatal_from_dict(self):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
         n_genes = 2
 
@@ -83,26 +87,13 @@ def test_forfatal_from_dict(self):
             "batch": ["batch"+str(i // 500) for i in range(n_cells)]
         })
 
-        # Build constraints:
-        dmat_loc, constraints_loc = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_loc_params", "loc_params"]
-        )
-        dmat_scale, constraints_scale = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_scale_params", "scale_params"]
-        )
-
         test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_loc,
-            dmat_scale=dmat_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
+            data=sim.input_data,
+            sample_description=sample_description,
+            formula_loc="~1+cond+batch",
+            formula_scale="~1+cond+batch",
+            constraints_loc={"batch": "cond"},
+            constraints_scale={"batch": "cond"},
             coef_to_test=["cond[T.cond1]"]
         )
         _ = test.summary()
@@ -122,8 +113,8 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 2000
-
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
@@ -134,26 +125,13 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
             "batch": ["batch" + str(i // 500) for i in range(n_cells)]
         })
 
-        # Build constraints:
-        dmat_loc, constraints_loc = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_loc_params", "loc_params"]
-        )
-        dmat_scale, constraints_scale = de.utils.constraint_matrix_from_dict(
-            sample_description=sample_description,
-            formula="~1+cond+batch",
-            constraints={"batch": "cond"},
-            dims=["design_scale_params", "scale_params"]
-        )
-
         test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_loc,
-            dmat_scale=dmat_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
+            data=sim.input_data,
+            sample_description=sample_description,
+            formula_loc="~1+cond+batch",
+            formula_scale="~1+cond+batch",
+            constraints_loc={"batch": "cond"},
+            constraints_scale={"batch": "cond"},
             coef_to_test=["cond[T.cond1]"]
         )
         _ = test.summary()
@@ -166,7 +144,7 @@ def test_null_distribution_wald_constrained(self, n_genes: int = 100):
 
         return True
 
-    def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
+    def _test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         """
         Test if de.wald() with constraints generates a uniform p-value distribution
         if it is given data simulated based on the null model. Returns the p-value
@@ -181,8 +159,8 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
 
+        np.random.seed(1)
         n_cells = 12000
-
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
         sim.generate()
@@ -213,12 +191,13 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
                              'tech1', 'tech2', 'tech3', 'tech4']
         dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
 
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est.iloc[:, [0]])
+        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est, return_type="dataframe")
+        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est.iloc[:, [0]], return_type="dataframe")
 
         # Build constraints:
         constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
+            dmat=dmat_est_loc.values,
+            coef_names=dmat_est_loc.columns,
             constraints=["bio1+bio2=0",
                          "bio3+bio4=0",
                          "bio5+bio6=0",
@@ -229,92 +208,20 @@ def test_null_distribution_wald_constrained_2layer(self, n_genes: int = 100):
         constraints_scale = None
 
         test = de.test.wald(
-            data=sim.x,
+            data=sim.input_data,
             dmat_loc=dmat_est_loc,
             dmat_scale=dmat_est_scale,
             constraints_loc=constraints_loc,
             constraints_scale=constraints_scale,
             coef_to_test=["treatment1"]
         )
-        summary = test.summary()
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
-
-        return True
-
-    def test_null_distribution_wald_multi_constrained_2layer(self, n_genes: int = 50):
-        """
-        Test if de.wald() for multiple coefficients with constraints
-        generates a uniform p-value distribution
-        if it is given data simulated based on the null model. Returns the p-value
-        of the two-side Kolmgorov-Smirnov test for equality of the observed
-        p-value distribution and a uniform distribution.
-
-        n_cells is constant as the design matrix and constraints depend on it.
-
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
-
-        n_cells = 3000
-
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        # Build design matrix:
-        dmat = np.zeros([n_cells, 9])
-        dmat[:, 0] = 1
-        dmat[:500, 1] = 1  # bio rep 1
-        dmat[500:1000, 2] = 1  # bio rep 2
-        dmat[1000:1500, 3] = 1  # bio rep 3
-        dmat[1500:2000, 4] = 1  # bio rep 4
-        dmat[2000:2500, 5] = 1  # bio rep 5
-        dmat[2500:3000, 6] = 1  # bio rep 6
-        dmat[1000:2000, 7] = 1  # condition effect 1
-        dmat[2000:3000, 8] = 1  # condition effect 2
-        coefficient_names = ['intercept', 'bio1', 'bio2', 'bio3', 'bio4',
-                             'bio5', 'bio6', 'treatment1', 'treatment2']
-        dmat_est = pd.DataFrame(data=dmat, columns=coefficient_names)
-
-        dmat_est_loc = de.utils.design_matrix(dmat=dmat_est)
-        dmat_est_scale = de.utils.design_matrix(dmat=dmat_est)
-
-        # Build constraints:
-        constraints_loc = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_loc,
-            constraints=["bio1+bio2=0",
-                         "bio3+bio4=0",
-                         "bio5+bio6=0"]
-        )
-        constraints_scale = de.utils.constraint_matrix_from_string(
-            dmat=dmat_est_scale,
-            constraints=["bio1+bio2=0",
-                         "bio3+bio4=0",
-                         "bio5+bio6=0"]
-        )
-
-        test = de.test.wald(
-            data=sim.x,
-            dmat_loc=dmat_est_loc,
-            dmat_scale=dmat_est_scale,
-            constraints_loc=constraints_loc,
-            constraints_scale=constraints_scale,
-            coef_to_test=["treatment1", "treatment2"]
-        )
         _ = test.summary()
 
         # Compare p-value distribution under null model against uniform distribution.
         pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % pval_h0
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
 
         return True
 
diff --git a/diffxpy/unit_test/test_continuous.py b/diffxpy/unit_test/test_continuous.py
deleted file mode 100644
index 57ad345..0000000
--- a/diffxpy/unit_test/test_continuous.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import unittest
-
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
-import logging
-
-from batchglm.api.models.glm_nb import Simulator
-import diffxpy.api as de
-
-
-class TestContinuous(unittest.TestCase):
-
-    def test_forfatal_functions(self):
-        """
-        Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
-
-        :param n_cells: Number of cells to simulate (number of observations per test).
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
-
-        num_observations = 10
-        num_features = 2
-
-        sim = Simulator(num_observations=num_observations, num_features=num_features)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs),
-            "batch": np.random.randint(2, size=sim.nobs)
-        })
-
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime + batch",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="wald",
-            sample_description=random_sample_description,
-            quick_scale=True,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
-        )
-
-        summary = test.summary()
-        ids = test.gene_ids
-
-        # 1. Test all additional functions which depend on model computation:
-        # 1.1. Only continuous model:
-        temp = test.log_fold_change(genes=ids, nonnumeric=False)
-        temp = test.max(genes=ids, nonnumeric=False)
-        temp = test.min(genes=ids, nonnumeric=False)
-        temp = test.argmax(genes=ids, nonnumeric=False)
-        temp = test.argmin(genes=ids, nonnumeric=False)
-        temp = test.summary(nonnumeric=False)
-        # 1.2. Full model:
-        temp = test.log_fold_change(genes=ids, nonnumeric=True)
-        temp = test.max(genes=ids, nonnumeric=True)
-        temp = test.min(genes=ids, nonnumeric=True)
-        temp = test.argmax(genes=ids, nonnumeric=True)
-        temp = test.argmin(genes=ids, nonnumeric=True)
-        temp = test.summary(nonnumeric=True)
-
-        return True
-
-    def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100):
-        """
-        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
-        if it is given data simulated based on the null model. Returns the p-value
-        of the two-side Kolmgorov-Smirnov test for equality of the observed 
-        p-value distriubution and a uniform distribution.
-
-        :param n_cells: Number of cells to simulate (number of observations per test).
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
-
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs)
-        })
-
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="wald",
-            sample_description=random_sample_description,
-            quick_scale=True,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
-        )
-        summary = test.summary()
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
-
-        return True
-
-    def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
-        """
-        Test if de.test.continuous() generates a uniform p-value distribution in lrt
-        if it is given data simulated based on the null model. Returns the p-value
-        of the two-side Kolmgorov-Smirnov test for equality of the observed
-        p-value distriubution and a uniform distribution.
-
-        :param n_cells: Number of cells to simulate (number of observations per test).
-        :param n_genes: Number of genes to simulate (number of tests).
-        """
-        logging.getLogger("tensorflow").setLevel(logging.INFO)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logging.getLogger("diffxpy").setLevel(logging.WARNING)
-
-        sim = Simulator(num_observations=n_cells, num_features=n_genes)
-        sim.generate_sample_description(num_batches=0, num_conditions=0)
-        sim.generate()
-
-        random_sample_description = pd.DataFrame({
-            "pseudotime": np.random.random(size=sim.nobs)
-        })
-
-        test = de.test.continuous_1d(
-            data=sim.X,
-            continuous="pseudotime",
-            df=3,
-            formula_loc="~ 1 + pseudotime",
-            formula_scale="~ 1",
-            factor_loc_totest="pseudotime",
-            test="lrt",
-            sample_description=random_sample_description,
-            quick_scale=False,
-            batch_size=None,
-            training_strategy="DEFAULT",
-            dtype="float64"
-        )
-        summary = test.summary()
-
-        # Compare p-value distribution under null model against uniform distribution.
-        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
-
-        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
-        assert pval_h0 > 0.05, "KS-Test failed: pval_h0 is <= 0.05!"
-
-        return True
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/diffxpy/unit_test/test_continuous_de.py b/diffxpy/unit_test/test_continuous_de.py
new file mode 100644
index 0000000..7a26654
--- /dev/null
+++ b/diffxpy/unit_test/test_continuous_de.py
@@ -0,0 +1,140 @@
+import unittest
+import logging
+import numpy as np
+
+import diffxpy.api as de
+
+
+class _TestContinuousDe:
+    noise_model: str
+
+    def _test_wald_de(
+            self,
+            constrained: bool,
+            spline_basis: str,
+            ngenes: int
+    ):
+        if self.noise_model == "nb":
+            from batchglm.api.models.tf1.glm_nb import Simulator
+            rand_fn_loc = lambda shape: np.random.uniform(2, 5, shape)
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif self.noise_model == "norm":
+            from batchglm.api.models import Simulator
+            rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % self.noise_model)
+
+        n_timepoints = 7
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
+        sim.generate_sample_description(
+            num_batches=0,
+            num_conditions=n_timepoints
+        )
+        sim.generate_params(
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
+        )
+        num_non_de = round(ngenes / 2)
+        sim.a_var[1:, :num_non_de] = 0  # Set all condition effects of non DE genes to zero.
+        sim.b_var[1:, :] = 0  # Use constant dispersion across all conditions.
+        self.isDE = np.arange(ngenes) >= num_non_de
+        sim.generate_data()
+
+        random_sample_description = sim.sample_description
+        random_sample_description["continuous"] = [int(x) for x in random_sample_description["condition"]]
+        random_sample_description["batch"] = [
+            str(int(x)) + str(np.random.randint(0, 3))
+            for x in random_sample_description["continuous"]
+        ]
+
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=random_sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            formula_loc="~ 1 + continuous + batch" if constrained else "~ 1 + continuous",
+            formula_scale="~ 1",
+            factor_loc_totest="continuous",
+            continuous="continuous",
+            constraints_loc={"batch": "continuous"} if constrained else None,
+            df=5,
+            spline_basis=spline_basis,
+            test="wald",
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        self._eval(sim=sim, test=test)
+
+    def _eval(self, sim, test):
+        idx_de = np.where(self.isDE)[0]
+        idx_nonde = np.where(np.logical_not(self.isDE))[0]
+
+        frac_de_of_non_de = np.mean(test.qval[idx_nonde] < 0.05)
+        frac_de_of_de = np.mean(test.qval[idx_de] < 0.05)
+
+        logging.getLogger("diffxpy").info(
+            'fraction of non-DE genes with q-value < 0.05: %.1f%%' %
+            float(100 * frac_de_of_non_de)
+        )
+        logging.getLogger("diffxpy").info(
+            'fraction of DE genes with q-value < 0.05: %.1f%%' %
+            float(100 * frac_de_of_de)
+        )
+        assert frac_de_of_non_de <= 0.1, "too many false-positives, FPR=%f" % frac_de_of_non_de
+        assert frac_de_of_de >= 0.5, "too many false-negatives, TPR=%f" % frac_de_of_de
+
+        return sim
+
+    def _test_wald_de_all_splines(
+            self,
+            ngenes: int,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_wald_de(ngenes=ngenes, constrained=constrained, spline_basis=x)
+
+
+class TestContinuousDeNb(_TestContinuousDe, unittest.TestCase):
+    """
+    Negative binomial noise model unit tests that tests false positive and false negative rates.
+    """
+
+    def test_wald_de_nb(self):
+        """
+
+        :return:
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.INFO)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        self._test_wald_de_all_splines(ngenes=100, constrained=False)
+        self._test_wald_de_all_splines(ngenes=100, constrained=True)
+        return True
+
+
+class TestContinuousDeNorm(_TestContinuousDe, unittest.TestCase):
+    """
+    Normal noise model unit tests that tests false positive and false negative rates.
+    """
+
+    def test_wald_de_norm(self):
+        """
+
+        :return:
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "norm"
+        np.random.seed(1)
+        self._test_wald_de_all_splines(ngenes=100, constrained=False)
+        self._test_wald_de_all_splines(ngenes=100, constrained=True)
+        return True
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_continuous_null.py b/diffxpy/unit_test/test_continuous_null.py
new file mode 100644
index 0000000..d84c829
--- /dev/null
+++ b/diffxpy/unit_test/test_continuous_null.py
@@ -0,0 +1,337 @@
+import unittest
+
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+import logging
+
+from batchglm.api.models.tf1.glm_nb import Simulator
+import diffxpy.api as de
+
+
+class _TestContinuous:
+
+    noise_model: str
+
+    def _fit_continuous(
+        self,
+        sim,
+        sample_description,
+        constrained,
+        test,
+        spline_basis
+    ):
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            formula_loc="~ 1 + continuous + batch" if constrained else "~ 1 + continuous",
+            formula_scale="~ 1",
+            factor_loc_totest="continuous",
+            continuous="continuous",
+            constraints_loc={"batch": "continuous"} if constrained else None,
+            size_factors="size_factors",
+            df=3,
+            spline_basis=spline_basis,
+            test=test,
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        return test
+
+    def _fit_continuous_interaction(
+        self,
+        sim,
+        sample_description,
+        constrained,
+        test,
+        spline_basis
+    ):
+        test = de.test.continuous_1d(
+            data=sim.input_data,
+            sample_description=sample_description,
+            gene_names=["gene" + str(i) for i in range(sim.input_data.num_features)],
+            formula_loc="~ 1 + continuous + condition + continuous:condition" if not constrained else \
+                "~ 1 + continuous + condition + continuous:condition + batch",
+            formula_scale="~ 1",
+            factor_loc_totest=["continuous", "continuous:condition"],
+            continuous="continuous",
+            constraints_loc={"batch": "condition"} if constrained else None,
+            size_factors="size_factors",
+            df=3,
+            spline_basis=spline_basis,
+            test=test,
+            quick_scale=True,
+            noise_model=self.noise_model
+        )
+        return test
+
+    def _test_basic(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        n_timepoints = 5
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params()
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "continuous": np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs), dtype=float)
+        })
+        random_sample_description["batch"] = [str(int(x)) + str(np.random.randint(0, 3))
+                                              for x in random_sample_description["continuous"]]
+        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)  # TODO put into simulation.
+        det = self._fit_continuous(
+            sim=sim,
+            sample_description=random_sample_description,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis,
+        )
+        return det
+
+    def _test_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        n_timepoints = 5
+        sim = Simulator(num_observations=n_timepoints*200, num_features=ngenes)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params()
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "continuous": np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs), dtype=float)
+        })
+        random_sample_description["condition"] = [str(np.random.randint(0, 2))
+                                                  for x in random_sample_description["continuous"]]
+        random_sample_description["batch"] = [x + str(np.random.randint(0, 3))
+                                              for x in random_sample_description["condition"]]
+        random_sample_description["size_factors"] = np.random.uniform(0.9, 1.1, sim.nobs)  # TODO put into simulation.
+        det = self._fit_continuous_interaction(
+            sim=sim,
+            sample_description=random_sample_description,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis,
+        )
+        return det
+
+    def _test_null_model(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        det = self._test_basic(
+            ngenes=ngenes,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis
+        )
+        return self._eval(det=det)
+
+    def _test_null_model_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        det = self._test_interaction(
+            ngenes=ngenes,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis
+        )
+        return self._eval(det=det)
+
+    def _eval(self, det):
+        pval_h0 = stats.kstest(det.pval, 'uniform').pvalue
+        logging.getLogger("diffxpy").info(
+            'KS-test pvalue for null model match of wald(): %f' % pval_h0
+        )
+        assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05" % pval_h0
+        return True
+
+    def _test_forfatal(
+            self,
+            test: str,
+            constrained: bool,
+            spline_basis: str
+    ):
+        """
+        Test if de.test.continuous() DifferentialExpressionTestSingle object functions work fine.
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        test = self._test_basic(
+            ngenes=2,
+            test=test,
+            constrained=constrained,
+            spline_basis=spline_basis
+        )
+        ids = test.gene_ids
+
+        # 1. Test all additional functions which depend on model computation:
+        # 1.1. Only continuous model:
+        _ = test.log_fold_change(genes=ids, non_numeric=False)
+        _ = test.max(genes=ids, non_numeric=False)
+        _ = test.min(genes=ids, non_numeric=False)
+        _ = test.argmax(genes=ids, non_numeric=False)
+        _ = test.argmin(genes=ids, non_numeric=False)
+        _ = test.summary(non_numeric=False)
+        # 1.2. Full model:
+        _ = test.log_fold_change(genes=ids, non_numeric=True)
+        _ = test.max(genes=ids, non_numeric=True)
+        _ = test.min(genes=ids, non_numeric=True)
+        _ = test.argmax(genes=ids, non_numeric=True)
+        _ = test.argmin(genes=ids, non_numeric=True)
+        _ = test.summary(non_numeric=True)
+        return True
+
+    def _test_for_fatal_all_splines(
+            self,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_forfatal(test=test, constrained=constrained, spline_basis=x)
+
+    def _test_null_model_all_splines(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_null_model(ngenes=ngenes, test=test, constrained=constrained, spline_basis=x)
+
+    def _test_null_model_all_splines_interaction(
+            self,
+            ngenes: int,
+            test: str,
+            constrained: bool
+    ):
+        for x in ["bs", "cr", "cc"]:
+            self._test_null_model_interaction(ngenes=ngenes, test=test, constrained=constrained, spline_basis=x)
+
+
+class TestContinuousNb(_TestContinuous, unittest.TestCase):
+
+    def test_forfatal_wald(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        self._test_for_fatal_all_splines(test="wald", constrained=False)
+        self._test_for_fatal_all_splines(test="wald", constrained=True)
+        return True
+
+    def _test_forfatal_lrt(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        _ = self._test_forfatal(test="lrt", constrained=False)
+        return True
+
+    def test_null_distribution_wald_unconstrained(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=False)
+        self._test_null_model_all_splines_interaction(ngenes=100, test="wald", constrained=False)
+        return True
+
+    def test_null_distribution_wald_constrained(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in the wald test
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        self._test_null_model_all_splines(ngenes=100, test="wald", constrained=True)
+        self._test_null_model_all_splines_interaction(ngenes=100, test="wald", constrained=True)
+        return True
+
+    def _test_null_distribution_lrt(self):
+        """
+        Test if de.test.continuous() generates a uniform p-value distribution in lrt
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed
+        p-value distriubution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.INFO)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        self.noise_model = "nb"
+        np.random.seed(1)
+        test = self._test_null_model(nobs=2000, ngenes=100, test="lrt", constrained=False)
+
+        # Compare p-value distribution under null model against uniform distribution.
+        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
+
+        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
+        return True
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_correction.py b/diffxpy/unit_test/test_correction.py
index cbd602e..9a99f6e 100644
--- a/diffxpy/unit_test/test_correction.py
+++ b/diffxpy/unit_test/test_correction.py
@@ -1,12 +1,4 @@
 import unittest
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
-import logging
-
-from batchglm.api.models.glm_nb import Simulator, Estimator, InputDataGLM
-import diffxpy.api as de
-
 
 
 class TestCorrection(unittest.TestCase):
diff --git a/diffxpy/unit_test/test_data_types.py b/diffxpy/unit_test/test_data_types.py
index f4874bb..b1f911f 100644
--- a/diffxpy/unit_test/test_data_types.py
+++ b/diffxpy/unit_test/test_data_types.py
@@ -6,7 +6,7 @@
 import scipy.sparse
 import anndata
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_enrich.py b/diffxpy/unit_test/test_enrich.py
index 8ed229f..a88e09f 100644
--- a/diffxpy/unit_test/test_enrich.py
+++ b/diffxpy/unit_test/test_enrich.py
@@ -1,10 +1,7 @@
 import unittest
 import logging
-import numpy as np
-import pandas as pd
-import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_extreme_values.py b/diffxpy/unit_test/test_extreme_values.py
index c430784..69498da 100644
--- a/diffxpy/unit_test/test_extreme_values.py
+++ b/diffxpy/unit_test/test_extreme_values.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_fit.py b/diffxpy/unit_test/test_fit.py
index 80b177f..27cb421 100644
--- a/diffxpy/unit_test/test_fit.py
+++ b/diffxpy/unit_test/test_fit.py
@@ -25,10 +25,10 @@ def _test_model_fit(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -43,7 +43,7 @@ def _test_model_fit(
             "batch": np.random.randint(2, size=sim.nobs)
         })
 
-        estim = de.fit.model(
+        _ = de.fit.model(
             data=sim.input_data,
             sample_description=random_sample_description,
             formula_loc="~ 1 + condition + batch",
@@ -68,10 +68,10 @@ def _test_model_fit_partition(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -114,9 +114,9 @@ def _test_residuals_fit(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
diff --git a/diffxpy/unit_test/test_numeric_covar.py b/diffxpy/unit_test/test_numeric_covar.py
index 2c3bedd..1fd05fd 100644
--- a/diffxpy/unit_test/test_numeric_covar.py
+++ b/diffxpy/unit_test/test_numeric_covar.py
@@ -3,7 +3,7 @@
 import numpy as np
 import logging
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_pairwise.py b/diffxpy/unit_test/test_pairwise_null.py
similarity index 86%
rename from diffxpy/unit_test/test_pairwise.py
rename to diffxpy/unit_test/test_pairwise_null.py
index 7907999..02154ea 100644
--- a/diffxpy/unit_test/test_pairwise.py
+++ b/diffxpy/unit_test/test_pairwise_null.py
@@ -18,11 +18,11 @@ def _prepate_data(
             n_groups: int
     ):
         if self.noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(0.1, 1, shape)
             rand_fn_scale = lambda shape: np.random.uniform(0.5, 1, shape)
         elif self.noise_model == "norm" or self.noise_model is None:
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
@@ -37,7 +37,7 @@ def _prepate_data(
         sim.generate_data()
 
         random_sample_description = pd.DataFrame({
-            "condition": np.random.randint(n_groups, size=sim.nobs)
+            "condition": [str(x) for x in np.random.randint(n_groups, size=sim.nobs)]
         })
         return sim, random_sample_description
 
@@ -64,7 +64,7 @@ def _test_null_distribution_basic(
             n_genes=n_genes,
             n_groups=n_groups
         )
-        test = de.test.pairwise(
+        det = de.test.pairwise(
             data=sim.input_data,
             sample_description=sample_description,
             grouping="condition",
@@ -73,13 +73,21 @@ def _test_null_distribution_basic(
             quick_scale=quick_scale,
             noise_model=self.noise_model
         )
-        _ = test.summary()
+        if not lazy:
+            _ = det.summary()
+            _ = det.pval
+            _ = det.qval
+            _ = det.log_fold_change()
+        # Single pair accessors:
+        _ = det.pval_pairs(groups0="0", groups1="1")
+        _ = det.qval_pairs(groups0="0", groups1="1")
+        _ = det.log10_pval_pairs_clean(groups0="0", groups1="1")
+        _ = det.log10_qval_pairs_clean(groups0="0", groups1="1")
+        _ = det.log_fold_change_pairs(groups0="0", groups1="1")
+        _ = det.summary_pairs(groups0="0", groups1="1")
 
         # Compare p-value distribution under null model against uniform distribution.
-        if lazy:
-            pval_h0 = stats.kstest(test.pval_pairs(groups0=0, groups1=1).flatten(), 'uniform').pvalue
-        else:
-            pval_h0 = stats.kstest(test.pval[0, 1, :].flatten(), 'uniform').pvalue
+        pval_h0 = stats.kstest(det.pval_pairs(groups0="0", groups1="1").flatten(), 'uniform').pvalue
 
         logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
         assert pval_h0 > 0.05, "KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5)
diff --git a/diffxpy/unit_test/test_partition.py b/diffxpy/unit_test/test_partition.py
index 7f83374..c041a04 100644
--- a/diffxpy/unit_test/test_partition.py
+++ b/diffxpy/unit_test/test_partition.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_single_de.py b/diffxpy/unit_test/test_single_de.py
index 8edf03f..6d402f5 100644
--- a/diffxpy/unit_test/test_single_de.py
+++ b/diffxpy/unit_test/test_single_de.py
@@ -20,11 +20,11 @@ def _prepare_data(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(5, 10, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models import Simulator
             rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
@@ -58,8 +58,8 @@ def _eval(self, sim, test):
             'fraction of DE genes with q-value < 0.05: %.1f%%' %
             float(100 * frac_de_of_de)
         )
-        assert frac_de_of_non_de <= 0.1, "too many false-positives"
-        assert frac_de_of_de >= 0.5, "too many false-negatives"
+        assert frac_de_of_non_de <= 0.1, "too many false-positives %f" % frac_de_of_non_de
+        assert frac_de_of_de >= 0.5, "too many false-negatives %f" % frac_de_of_de
 
         return sim
 
diff --git a/diffxpy/unit_test/test_single_external_libs.py b/diffxpy/unit_test/test_single_external_libs.py
index b1d4883..f6832ec 100644
--- a/diffxpy/unit_test/test_single_external_libs.py
+++ b/diffxpy/unit_test/test_single_external_libs.py
@@ -3,7 +3,7 @@
 import numpy as np
 import scipy.stats as stats
 
-from batchglm.api.models.glm_nb import Simulator
+from batchglm.api.models.tf1.glm_nb import Simulator
 import diffxpy.api as de
 
 
diff --git a/diffxpy/unit_test/test_single_fullrank.py b/diffxpy/unit_test/test_single_fullrank.py
new file mode 100644
index 0000000..cf99518
--- /dev/null
+++ b/diffxpy/unit_test/test_single_fullrank.py
@@ -0,0 +1,84 @@
+import unittest
+import logging
+import numpy as np
+import pandas as pd
+
+import diffxpy.api as de
+
+
+class _TestSingleFullRank(unittest.TestCase):
+
+    def _test_single_full_rank(self):
+        """
+        Test if de.wald() generates a uniform p-value distribution
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distribution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        :param noise_model: Noise model to use for data fitting.
+        """
+        if self.noise_model == "nb":
+            from batchglm.api.models.tf1.glm_nb import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif self.noise_model == "norm":
+            from batchglm.api.models import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % self.noise_model)
+
+        sim = Simulator(num_observations=200, num_features=2)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params(rand_fn_scale=rand_fn_scale)
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "condition": [str(x) for x in np.random.randint(2, size=sim.nobs)]
+        })
+
+        try:
+            random_sample_description["batch"] = random_sample_description["condition"]
+            _ = de.test.wald(
+                data=sim.input_data,
+                sample_description=random_sample_description,
+                factor_loc_totest="condition",
+                formula_loc="~ 1 + condition + batch",
+                noise_model=self.noise_model
+            )
+        except ValueError as error:
+            logging.getLogger("diffxpy").info(error)
+        else:
+            raise ValueError("rank error was erroneously not thrown on under-determined unconstrained system")
+
+        try:
+            random_sample_description["batch"] = [
+                x + str(np.random.randint(0, 2)) for x in random_sample_description["condition"].values
+            ]
+            _ = de.test.wald(
+                data=sim.input_data,
+                sample_description=random_sample_description,
+                factor_loc_totest="condition",
+                formula_loc="~ 1 + condition + batch",
+                constraints_loc={"batch": "condition"},
+                noise_model=self.noise_model
+            )
+        except ValueError as error:
+            raise ValueError("rank error was erroneously thrown on defined constrained system")
+
+    def test_single_full_rank(
+            self):
+        """
+        Test if error is thrown if input is not full rank.
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.INFO)
+
+        np.random.seed(1)
+        self.noise_model = "nb"
+        return self._test_single_full_rank()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_single_null.py b/diffxpy/unit_test/test_single_null.py
index 3c9834c..7a3ec56 100644
--- a/diffxpy/unit_test/test_single_null.py
+++ b/diffxpy/unit_test/test_single_null.py
@@ -26,10 +26,10 @@ def _test_null_distribution_wald(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
             rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
@@ -49,10 +49,7 @@ def _test_null_distribution_wald(
             sample_description=random_sample_description,
             factor_loc_totest="condition",
             formula_loc="~ 1 + condition + batch",
-            batch_size=500,
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 
@@ -81,9 +78,9 @@ def _test_null_distribution_wald_multi(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
@@ -100,9 +97,7 @@ def _test_null_distribution_wald_multi(
             sample_description=random_sample_description,
             factor_loc_totest="condition",
             formula_loc="~ 1 + condition",
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 
@@ -131,9 +126,9 @@ def _test_null_distribution_lrt(
         :param noise_model: Noise model to use for data fitting.
         """
         if noise_model == "nb":
-            from batchglm.api.models.glm_nb import Simulator
+            from batchglm.api.models.tf1.glm_nb import Simulator
         elif noise_model == "norm":
-            from batchglm.api.models.glm_norm import Simulator
+            from batchglm.api.models.tf1.glm_norm import Simulator
         else:
             raise ValueError("noise model %s not recognized" % noise_model)
 
@@ -152,9 +147,7 @@ def _test_null_distribution_lrt(
             full_formula_scale="~ 1",
             reduced_formula_loc="~ 1",
             reduced_formula_scale="~ 1",
-            noise_model=noise_model,
-            training_strategy="DEFAULT",
-            dtype="float64"
+            noise_model=noise_model
         )
         _ = test.summary()
 
@@ -180,7 +173,7 @@ def _test_null_distribution_ttest(
         :param n_cells: Number of cells to simulate (number of observations per test).
         :param n_genes: Number of genes to simulate (number of tests).
         """
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models.tf1.glm_norm import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -220,7 +213,7 @@ def _test_null_distribution_rank(
         :param n_cells: Number of cells to simulate (number of observations per test).
         :param n_genes: Number of genes to simulate (number of tests).
         """
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models.tf1.glm_norm import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
diff --git a/diffxpy/unit_test/test_single_sf_null.py b/diffxpy/unit_test/test_single_sf_null.py
new file mode 100644
index 0000000..625379e
--- /dev/null
+++ b/diffxpy/unit_test/test_single_sf_null.py
@@ -0,0 +1,128 @@
+import unittest
+import logging
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+
+import diffxpy.api as de
+
+
+class _TestSingleSfNull:
+
+    def _test_null_distribution_wald(
+            self,
+            n_cells: int,
+            n_genes: int,
+            noise_model: str
+    ):
+        """
+        Test if de.wald() generates a uniform p-value distribution
+        if it is given data simulated based on the null model. Returns the p-value
+        of the two-side Kolmgorov-Smirnov test for equality of the observed 
+        p-value distribution and a uniform distribution.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        :param noise_model: Noise model to use for data fitting.
+        """
+        if noise_model == "nb":
+            from batchglm.api.models.tf1.glm_nb import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        elif noise_model == "norm":
+            from batchglm.api.models import Simulator
+            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
+        else:
+            raise ValueError("noise model %s not recognized" % noise_model)
+
+        sim = Simulator(num_observations=n_cells, num_features=n_genes)
+        sim.generate_sample_description(num_batches=0, num_conditions=0)
+        sim.generate_params(rand_fn_scale=rand_fn_scale)
+        sim.generate_data()
+
+        random_sample_description = pd.DataFrame({
+            "condition": np.random.randint(2, size=sim.nobs),
+            "batch": np.random.randint(2, size=sim.nobs)
+        })
+        random_sf = np.random.uniform(0.5, 1.5, sim.nobs)
+
+        test = de.test.wald(
+            data=sim.input_data,
+            sample_description=random_sample_description,
+            factor_loc_totest="condition",
+            formula_loc="~ 1 + condition + batch",
+            size_factors=random_sf,
+            batch_size=500,
+            noise_model=noise_model,
+            training_strategy="DEFAULT",
+            dtype="float64"
+        )
+        _ = test.summary()
+
+        # Compare p-value distribution under null model against uniform distribution.
+        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue
+
+        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald(): %f' % pval_h0)
+        assert pval_h0 > 0.05, ("KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5))
+
+        return True
+
+
+class TestSingleSfNullNb(_TestSingleSfNull, unittest.TestCase):
+    """
+    Negative binomial noise model unit tests that test whether a test generates uniformly
+    distributed p-values if data are sampled from the null model.
+    """
+
+    def test_null_distribution_wald_nb(
+            self,
+            n_cells: int = 2000,
+            n_genes: int = 200
+    ):
+        """
+        Test if wald() generates a uniform p-value distribution for "nb" noise model.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        np.random.seed(1)
+        return self._test_null_distribution_wald(
+            n_cells=n_cells,
+            n_genes=n_genes,
+            noise_model="nb"
+        )
+
+
+class TestSingleSfNullNorm(_TestSingleSfNull, unittest.TestCase):
+    """
+    Normal noise model unit tests that test whether a test generates uniformly
+    distributed p-values if data are sampled from the null model.
+    """
+    def test_null_distribution_wald_norm(
+            self,
+            n_cells: int = 200,
+            n_genes: int = 200
+    ):
+        """
+        Test if wald() generates a uniform p-value distribution for "norm" noise model.
+
+        :param n_cells: Number of cells to simulate (number of observations per test).
+        :param n_genes: Number of genes to simulate (number of tests).
+        """
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("diffxpy").setLevel(logging.WARNING)
+
+        np.random.seed(1)
+        return self._test_null_distribution_wald(
+            n_cells=n_cells,
+            n_genes=n_genes,
+            noise_model="norm"
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/diffxpy/unit_test/test_vsrest.py b/diffxpy/unit_test/test_vsrest.py
index 9a78505..cb3b074 100644
--- a/diffxpy/unit_test/test_vsrest.py
+++ b/diffxpy/unit_test/test_vsrest.py
@@ -22,7 +22,7 @@ def test_null_distribution_wald(self, n_cells: int = 2000, n_genes: int = 100, n
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -65,7 +65,7 @@ def test_null_distribution_lrt(self, n_cells: int = 2000, n_genes: int = 100):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.ERROR)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -108,7 +108,7 @@ def test_null_distribution_rank(self, n_cells: int = 2000, n_genes: int = 100, n
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_nb import Simulator
+        from batchglm.api.models.tf1.glm_nb import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
@@ -148,7 +148,7 @@ def test_null_distribution_ttest(self, n_cells: int = 2000, n_genes: int = 100,
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
         logging.getLogger("diffxpy").setLevel(logging.WARNING)
-        from batchglm.api.models.glm_norm import Simulator
+        from batchglm.api.models import Simulator
 
         sim = Simulator(num_observations=n_cells, num_features=n_genes)
         sim.generate_sample_description(num_batches=0, num_conditions=0)
diff --git a/docs/requires.txt b/docs/requires.txt
index 5e35d86..1592d45 100644
--- a/docs/requires.txt
+++ b/docs/requires.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.6
+batchglm>=0.7.1
 statsmodels
 anndata
 seaborn
diff --git a/requirements.txt b/requirements.txt
index 5e35d86..1592d45 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.14.0
 scipy
 pandas
 patsy>=0.5.0
-batchglm>=0.6.6
+batchglm>=0.7.1
 statsmodels
 anndata
 seaborn
diff --git a/setup.py b/setup.py
index a932746..99ba194 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
         'scipy>=1.2.1',
         'pandas',
         'patsy>=0.5.0',
-        'batchglm>=0.6.6',
+        'batchglm>=0.7.1',
         'statsmodels',
     ],
     extras_require={