diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py
index 0c594b09e8..da768ffd52 100644
--- a/pymc3/step_methods/hmc/base_hmc.py
+++ b/pymc3/step_methods/hmc/base_hmc.py
@@ -44,7 +44,7 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
         if isinstance(scaling, dict):
             scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars)
 
-        n = scaling.shape[0]
+        n = model.dict_to_array(model.test_point).shape[0]
         self.step_size = step_scale / (n ** 0.25)
         self.potential = quad_potential(scaling, is_cov, as_cov=False)
 
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index 835e44174c..b4d62f6b24 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -1,15 +1,19 @@
 from numpy import dot
 from numpy.random import normal
 import scipy.linalg
+from scipy import linalg
 from theano.tensor import slinalg
+import theano.tensor as tt
+import theano
 from scipy.sparse import issparse
+import itertools
 
 from pymc3.theanof import floatX
 
 import numpy as np
 
 __all__ = ['quad_potential', 'ElemWiseQuadPotential', 'QuadPotential',
-           'QuadPotential_Inv', 'isquadpotential']
+           'QuadPotential_Inv', 'isquadpotential', 'QuadPotentialSample']
 
 
 def quad_potential(C, is_cov, as_cov):
@@ -18,7 +22,8 @@ def quad_potential(C, is_cov, as_cov):
     ----------
     C : arraylike, 0 <= ndim <= 2
         scaling matrix for the potential
-        vector treated as diagonal matrix
+        vector treated as diagonal matrix.
+        If C is already a potential, it is returned unchanged.
     is_cov : Boolean
         whether C is provided as a covariance matrix or hessian
     as_cov : Boolean
@@ -29,6 +34,8 @@ def quad_potential(C, is_cov, as_cov):
     -------
     q : Quadpotential
     """
+    if isquadpotential(C):
+        return C
 
     if issparse(C):
         if not chol_available:
@@ -135,6 +142,150 @@ def energy(self, x):
 
     __call__ = random
 
+
+def _solve_lanczos(V, alphas, betas, estimator=None):
+    e = np.zeros(len(alphas))
+    e[0] = 1
+    T = [alphas, betas]
+    # TODO Is there a way to avoid computing all eigenvectors?
+    eig, eigv = linalg.eig_banded(T, lower=True)
+    if estimator is None:
+        if np.any(eig < 0):
+            raise ValueError("Matrix is not positive definite")
+        eig = 1 / np.sqrt(eig)
+    else:
+        eig = estimator(eig)
+    f_T = eigv @ (eig * np.linalg.solve(eigv, e))
+    return np.dot(np.transpose(V), f_T)
+
+
+def _sample_mvn_lanczos(matprot, z, epsilon, maxiter=None,
+                       verbose=False, estimator=None):
+    """Sample from a multivariate normal using matrix-free lanczos.
+
+    Given a matrix :math:`A`, sample from :math:`N(0, A^{-1})`, using
+    only matrix vector products :math:`Ax`. This is done by approximating
+    :math:`A^{-1/2}z` for a :math:`z \sim N(0, I)` using a Lanczos process
+    as described in [1].
+
+    Parameters
+    ----------
+    matprot : function(x) -> Ax
+        A function that computes the matrix-vector product :math:`Ax`.
+    z : ndarray
+        A standard normal distributed vector.
+    epsilon : float
+        Stop iteration, if the relative change of the estimate is below
+        `epsilon`.
+    maxiter : int, optional
+        Maximum number of iterations.
+    estimator : function, optional
+        A function that maps the eigenvalues of the krylov subspace to
+        what is supposed to be estimated. By default this is
+        `lambda eig: 1 / np.sqrt(eig)`. If you want to sample from
+        :math:`N(0, A)` instead of from :math:`N(0, A^{-1/2})`, you can
+        set this to `lambda eig: np.sqrt(eig)`.
+
+    References
+    ----------
+    [1] Chow, E., and Y. Saad. “Preconditioned Krylov Subspace Methods for
+        Sampling Multivariate Gaussian Distributions.” SIAM Journal on
+        Scientific Computing 36, no. 2 (January 1, 2014): A588–608.
+        doi:10.1137/130920587.
+    """
+    n = len(z)
+    z_norm = np.linalg.norm(z)
+    z = z / z_norm
+    q = z.copy()
+    beta = 1
+
+    alphas = []
+    betas = []
+    V = []
+
+    for n in itertools.count():
+        if maxiter is not None and n > maxiter:
+            raise ArithmeticError("Maximum number of iterations reached.")
+        v = q / beta
+        if n > 0:
+            q = matprot(v) - beta * V[-1]
+        else:
+            q = matprot(v)
+
+        alpha = v @ q
+        q = q - alpha * v
+        beta = np.linalg.norm(q)
+
+        alphas.append(alpha)
+        betas.append(beta)
+        V.append(v)
+
+        y_new = _solve_lanczos(V, alphas, betas, estimator)
+
+        if beta == 0:
+            return z_norm * y_new
+
+        if n > 0:
+            error = np.linalg.norm(y_new - y_old) / np.linalg.norm(y_new)
+            if np.isnan(error):
+                raise ArithmeticError("Current error is nan.")
+            if verbose and n % 100 == 0:
+                print(error)
+            if error < epsilon:
+                return z_norm * y_new
+        y_old = y_new
+
+
+class QuadPotentialSample:
+    def __init__(self, samples, lam=0.01, epsilon=1e-8, maxiter=None):
+        """Use posterior samples to estimate the covariance.
+
+        Parameters
+        ----------
+        samples : ndarray
+            An array of posterior samples with shape `(s, n)`, where
+            `s` is the number of samples and `n` the number of parameters.
+        lam : float
+            Regularization parameter. Use :math:`(1 - \lambda)\Sigma +
+            \lambda D` as covariance, where `\Sigma` is the sample covariance
+            and D is the diagonal matrix containing the sample variances.
+        epsilon : float, optional
+            Error tolerance used in the convergence criterion of the
+            lanczos process.
+        maxiter : int, optional
+            The maximum number of iterations in the lanczos process.
+        """
+        self.k, self.n = samples.shape
+        self.lam = lam
+        self.samples = samples.astype(np.float64).copy()
+        self.samples[:] -= self.samples.mean(axis=0)
+        self.stds = self.samples.std(axis=0)
+        self.vars = self.stds ** 2
+        self.epsilon = epsilon
+        self.maxiter = maxiter
+
+    def velocity(self, x):
+        samples = theano.shared(floatX(np.copy(self.samples, 'C')))
+        D = theano.shared(floatX(self.vars))
+        Ax = tt.dot(samples, x)
+        Ax = tt.dot(samples.T, Ax) / (self.k - 1)
+        return floatX(1 - self.lam) * Ax + floatX(self.lam) * D * x
+
+    def matvec(self, x):
+        Ax = self.samples.T.dot(self.samples.dot(x)) / (self.k - 1)
+        return (1 - self.lam) * Ax + self.lam * x * self.vars
+
+    def energy(self, x):
+        return floatX(0.5) * x.dot(self.velocity(x))
+
+    def random(self):
+        b = normal(size=self.n)
+        # Use the diagonal matrix as preconditioner
+        matvec = lambda x: self.matvec(x / self.stds) / self.stds
+        sample = _sample_mvn_lanczos(matvec, b, self.epsilon, self.maxiter)
+        return floatX(sample / self.stds)
+
+
 try:
     import sksparse.cholmod as cholmod
     chol_available = True
diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py
index 9a7a5da052..d8e5c158db 100644
--- a/pymc3/tests/test_quadpotential.py
+++ b/pymc3/tests/test_quadpotential.py
@@ -3,6 +3,7 @@
 import scipy.sparse
 import theano.tensor as tt
 import theano
+import numpy.testing as npt
 
 from pymc3.step_methods.hmc import quadpotential
 
@@ -64,23 +65,35 @@ def test_equal_diag():
         x_ = np.random.randn(5)
         x = tt.vector()
         x.tag.test_value = x_
+
+        samples = np.random.randn(100000, 5) * np.sqrt(diag)
+
         pots = [
-            quadpotential.quad_potential(diag, False, False),
-            quadpotential.quad_potential(1. / diag, True, False),
-            quadpotential.quad_potential(np.diag(diag), False, False),
-            quadpotential.quad_potential(np.diag(1. / diag), True, False),
+            quadpotential.quad_potential(diag, True, False),
+            quadpotential.quad_potential(1. / diag, False, False),
+            quadpotential.quad_potential(np.diag(diag), True, False),
+            quadpotential.quad_potential(np.diag(1. / diag), False, False),
         ]
         if quadpotential.chol_available:
-            diag_ = scipy.sparse.csc_matrix(np.diag(1. / diag))
+            diag_ = scipy.sparse.csc_matrix(np.diag(diag))
             pots.append(quadpotential.quad_potential(diag_, True, False))
 
-        v = np.diag(1. / diag).dot(x_)
-        e = x_.dot(np.diag(1. / diag).dot(x_)) / 2
-        for pot in pots:
+        pots.append(quadpotential.QuadPotentialSample(samples, 0))
+
+        v = np.diag(diag).dot(x_)
+        e = x_.dot(np.diag(diag).dot(x_)) / 2
+        for pot in pots[:-1]:
+            v_function = theano.function([x], pot.velocity(x))
+            e_function = theano.function([x], pot.energy(x))
+            npt.assert_allclose(v_function(x_), v)
+            npt.assert_allclose(e_function(x_), e)
+
+        # QuadPotentialSample is less accurate
+        for pot in pots[-1:]:
             v_function = theano.function([x], pot.velocity(x))
             e_function = theano.function([x], pot.energy(x))
-            assert np.allclose(v_function(x_), v)
-            assert np.allclose(e_function(x_), e)
+            npt.assert_allclose(v_function(x_), v, rtol=0.2)
+            npt.assert_allclose(e_function(x_), e, rtol=0.2)
 
 
 def test_equal_dense():
@@ -94,35 +107,51 @@ def test_equal_dense():
         x_ = np.random.randn(5)
         x = tt.vector()
         x.tag.test_value = x_
+
+        samples = np.random.multivariate_normal(np.zeros_like(x_), cov, 1000)
+
         pots = [
-            quadpotential.quad_potential(cov, False, False),
-            quadpotential.quad_potential(inv, True, False),
+            quadpotential.quad_potential(cov, True, False),
+            quadpotential.quad_potential(inv, False, False),
         ]
         if quadpotential.chol_available:
-            pots.append(quadpotential.quad_potential(cov, False, False))
+            pots.append(quadpotential.quad_potential(cov, True, False))
+
+        pots.append(quadpotential.QuadPotentialSample(samples, 0))
 
-        v = np.linalg.solve(cov, x_)
+        v = np.dot(cov, x_)
         e = 0.5 * x_.dot(v)
-        for pot in pots:
+        for pot in pots[:-1]:
             v_function = theano.function([x], pot.velocity(x))
             e_function = theano.function([x], pot.energy(x))
-            assert np.allclose(v_function(x_), v)
-            assert np.allclose(e_function(x_), e)
+            npt.assert_allclose(v_function(x_), v)
+            npt.assert_allclose(e_function(x_), e)
+
+        for pot in pots[:-1]:
+            v_function = theano.function([x], pot.velocity(x))
+            e_function = theano.function([x], pot.energy(x))
+            npt.assert_allclose(v_function(x_), v, rtol=0.5)
+            npt.assert_allclose(e_function(x_), e, rtol=0.5)
 
 
 def test_random_diag():
     d = np.arange(10) + 1
     np.random.seed(42)
+
+    samples = np.random.randn(1000, 10) * np.sqrt(d)
+
     pots = [
         quadpotential.quad_potential(d, True, False),
         quadpotential.quad_potential(1./d, False, False),
         quadpotential.quad_potential(np.diag(d), True, False),
         quadpotential.quad_potential(np.diag(1./d), False, False),
+        quadpotential.QuadPotentialSample(samples, 0),
     ]
     if quadpotential.chol_available:
         d_ = scipy.sparse.csc_matrix(np.diag(d))
         pot = quadpotential.quad_potential(d, True, False)
         pots.append(pot)
+
     for pot in pots:
         vals = np.array([pot.random() for _ in range(1000)])
         assert np.allclose(vals.std(0), np.sqrt(1./d), atol=0.1)
@@ -137,13 +166,17 @@ def test_random_dense():
         inv = np.linalg.inv(cov)
         assert np.allclose(inv.dot(cov), np.eye(5))
 
+        samples = np.random.multivariate_normal(np.zeros(5), cov, 1000)
+
         pots = [
             quadpotential.QuadPotential(cov),
             quadpotential.QuadPotential_Inv(inv),
+            quadpotential.QuadPotentialSample(samples, 0),
         ]
         if quadpotential.chol_available:
             pot = quadpotential.QuadPotential_Sparse(scipy.sparse.csc_matrix(cov))
             pots.append(pot)
+
         for pot in pots:
             cov_ = np.cov(np.array([pot.random() for _ in range(1000)]).T)
             assert np.allclose(cov_, inv, atol=0.1)