diff --git a/src/BlockVector.cc b/src/BlockVector.cc
index 21e2854c..a1923581 100644
--- a/src/BlockVector.cc
+++ b/src/BlockVector.cc
@@ -478,6 +478,27 @@ void BlockVector<ScalarType, MemorySpaceType>::axpy(const double alpha,
     LinearAlgebraUtils<MemorySpaceType>::MPaxpy(
         locnumel_, alpha, bv.vect_[ix] + shift, vect_[iy] + shift);
 }
+
+template <typename ScalarType, typename MemorySpaceType>
+void BlockVector<ScalarType, MemorySpaceType>::applyDiagonalOp(
+    const std::vector<double>& diag,
+    BlockVector<ScalarType, MemorySpaceType>& dst) const
+{
+    diagop_tm_.start();
+
+    const double* const dd = diag.data();
+
+    for (unsigned int j = 0; j < vect_.size(); j++)
+    {
+        const ScalarType* __restrict__ srcj = vect_[j];
+        ScalarType* __restrict__ dstj       = dst.vect_[j];
+        for (int i = 0; i < numel_; i++)
+            dstj[i] = (ScalarType)(dd[i] * (double)srcj[i]);
+    }
+
+    diagop_tm_.stop();
+}
+
 template <typename ScalarType, typename MemorySpaceType>
 void BlockVector<ScalarType, MemorySpaceType>::hasnan(const int j) const
 {
@@ -534,6 +555,7 @@ void BlockVector<ScalarType, MemorySpaceType>::printTimers(std::ostream& os)
     scal_tm_.print(os);
     opminus_tm_.print(os);
     copy_tm_.print(os);
+    diagop_tm_.print(os);
 }
 
 template <typename ScalarType, typename MemorySpaceType>
diff --git a/src/BlockVector.h b/src/BlockVector.h
index f424d78f..33016d62 100644
--- a/src/BlockVector.h
+++ b/src/BlockVector.h
@@ -34,6 +34,7 @@ class BlockVector
     static Timer scal_tm_;
     static Timer opminus_tm_;
     static Timer copy_tm_;
+    static Timer diagop_tm_;
 
     static short n_instances_;
     static short subdivx_;
@@ -137,6 +138,9 @@ class BlockVector
         return vect_[i];
     }
 
+    void applyDiagonalOp(const std::vector<double>& diag,
+        BlockVector<ScalarType, MemorySpaceType>& dst) const;
+
     ScalarType maxAbsValue() const;
 
     template <typename ScalarType2>
@@ -331,4 +335,8 @@ Timer BlockVector<ScalarType, MemorySpaceType>::opminus_tm_(
 
 template <typename ScalarType, typename MemorySpaceType>
 Timer BlockVector<ScalarType, MemorySpaceType>::copy_tm_("BlockVector::copy");
+
+template <typename ScalarType, typename MemorySpaceType>
+Timer BlockVector<ScalarType, MemorySpaceType>::diagop_tm_(
+    "BlockVector::diagop");
 #endif
diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc
index 85f6fc9f..636de977 100644
--- a/src/DavidsonSolver.cc
+++ b/src/DavidsonSolver.cc
@@ -521,22 +521,29 @@ int DavidsonSolver<OrbitalsType, MatrixType>::solve(
 
                 kbpsi_2.computeHvnlMatrix(&kbpsi_2, ions_, h22nl);
                 kbpsi_1.computeHvnlMatrix(&kbpsi_2, ions_, h12nl);
+
+                h12 = h12nl;
+                h22 = h22nl;
             }
             else
             {
-                h11 = h11nl;
-                hamiltonian_->applyLocal(numst_, orbitals, hphi);
+                hamiltonian_->applyDeltaPot(orbitals, hphi);
                 orbitals.addDotWithNcol2Matrix(hphi, h11);
             }
 
-            // compute H*P and store in hphi
-            hamiltonian_->applyLocal(numst_, work_orbitals, hphi);
+            if (inner_it == 0)
+            {
+                // compute H*P and store in hphi
+                hamiltonian_->applyLocal(numst_, work_orbitals, hphi);
+            }
+            else
+            {
+                hamiltonian_->applyDeltaPot(work_orbitals, hphi);
+            }
 
             // update h22, h12 and h21
-            h12 = h12nl;
             orbitals.addDotWithNcol2Matrix(hphi, h12);
 
-            h22 = h22nl;
             work_orbitals.addDotWithNcol2Matrix(hphi, h22);
 
             h21.transpose(1., h12, 0.);
@@ -609,16 +616,11 @@ int DavidsonSolver<OrbitalsType, MatrixType>::solve(
                 energy_->saveVofRho();
 
                 // update h11, h22, h12, and h21
-                h11 = h11nl;
-                hamiltonian_->applyLocal(numst_, orbitals, hphi);
+                hamiltonian_->applyDeltaPot(orbitals, hphi);
                 orbitals.addDotWithNcol2Matrix(hphi, h11);
 
-                hamiltonian_->applyLocal(numst_, work_orbitals, hphi);
-
-                h22 = h22nl;
+                hamiltonian_->applyDeltaPot(work_orbitals, hphi);
                 work_orbitals.addDotWithNcol2Matrix(hphi, h22);
-
-                h12 = h12nl;
                 orbitals.addDotWithNcol2Matrix(hphi, h12);
 
                 h21.transpose(1., h12, 0.);
diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h
index 52ece157..c641459d 100644
--- a/src/ExtendedGridOrbitals.h
+++ b/src/ExtendedGridOrbitals.h
@@ -281,6 +281,12 @@ class ExtendedGridOrbitals : public Orbitals
         assert(numst_ < 10000);
         return numst_;
     }
+    void applyDiagonalOp(
+        const std::vector<POTDTYPE>& v, ExtendedGridOrbitals& hphi) const
+    {
+        block_vector_.applyDiagonalOp(v, hphi.block_vector_);
+    }
+
     short subdivx(void) const { return 1; }
     void printChromaticNumber(std::ostream& os) const
     {
diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc
index aed9dbad..601fdc24 100644
--- a/src/Hamiltonian.cc
+++ b/src/Hamiltonian.cc
@@ -161,6 +161,14 @@ void Hamiltonian<T>::applyLocal(const int ncolors, T& phi, T& hphi)
     apply_Hloc_tm_.stop();
 }
 
+template <class T>
+void Hamiltonian<T>::applyDeltaPot(const T& phi, T& hphi)
+{
+    const std::vector<POTDTYPE>& dv(pot_->dv());
+
+    phi.applyDiagonalOp(dv, hphi);
+}
+
 // add to hij the elements <phi1|Hloc|phi2>
 // corresponding to the local part of the Hamiltonian
 template <>
diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h
index 0c87ceda..7263dff6 100644
--- a/src/Hamiltonian.h
+++ b/src/Hamiltonian.h
@@ -42,6 +42,11 @@ class Hamiltonian
     const OrbitalsType& applyLocal(OrbitalsType& phi, const bool force = false);
     void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi);
 
+    /*!
+     * Apply potential difference dv to phi
+     */
+    void applyDeltaPot(const OrbitalsType& phi, OrbitalsType& hphi);
+
     template <class MatrixType>
     void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2,
         MatrixType& mat, const bool force);
diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h
index a2cd525d..ee49497c 100644
--- a/src/LocGridOrbitals.h
+++ b/src/LocGridOrbitals.h
@@ -363,6 +363,12 @@ class LocGridOrbitals : public Orbitals
                   << std::endl;
     }
 
+    void applyDiagonalOp(
+        const std::vector<POTDTYPE>& v, LocGridOrbitals& hphi) const
+    {
+        block_vector_.applyDiagonalOp(v, hphi.block_vector_);
+    }
+
     void scal(const double alpha)
     {
         block_vector_.scal(alpha);
diff --git a/src/MGmol.cc b/src/MGmol.cc
index a6af740e..cf57a746 100644
--- a/src/MGmol.cc
+++ b/src/MGmol.cc
@@ -902,8 +902,12 @@ void MGmol<OrbitalsType>::printTimers()
     proj_matrices_->printTimers(os_);
     ShortSightedInverse::printTimers(os_);
     if (std::is_same<OrbitalsType, ExtendedGridOrbitals<ORBDTYPE>>::value)
+    {
         MVPSolver<ExtendedGridOrbitals<ORBDTYPE>,
             dist_matrix::DistMatrix<DISTMATDTYPE>>::printTimers(os_);
+        MVPSolver<ExtendedGridOrbitals<ORBDTYPE>,
+            ReplicatedMatrix>::printTimers(os_);
+    }
     VariableSizeMatrixInterface::printTimers(os_);
     DataDistribution::printTimers(os_);
     PackedCommunicationBuffer::printTimers(os_);
diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc
index 3be7fa10..97bcc825 100644
--- a/src/MVPSolver.cc
+++ b/src/MVPSolver.cc
@@ -213,6 +213,8 @@ int MVPSolver<OrbitalsType, MatrixType>::solve(OrbitalsType& orbitals)
 
         OrbitalsType hphi("MVP_hphi", orbitals);
 
+        MatrixType h11(h11_nl);
+
         for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++)
         {
             if (onpe0 && ct.verbose > 1)
@@ -240,8 +242,14 @@ int MVPSolver<OrbitalsType, MatrixType>::solve(OrbitalsType& orbitals)
 
             // compute h11 for the current potential by adding local part to
             // nonlocal components
-            MatrixType h11(h11_nl);
-            hamiltonian_->applyLocal(numst_, orbitals, hphi);
+            if (inner_it == 0)
+            {
+                hamiltonian_->applyLocal(numst_, orbitals, hphi);
+            }
+            else
+            {
+                hamiltonian_->applyDeltaPot(orbitals, hphi);
+            }
             orbitals.addDotWithNcol2Matrix(hphi, h11);
 
             current_proj_mat->assignH(h11);
@@ -320,8 +328,7 @@ int MVPSolver<OrbitalsType, MatrixType>::solve(OrbitalsType& orbitals)
                     energy_->saveVofRho();
 
                     // update h11
-                    h11 = h11_nl;
-                    hamiltonian_->applyLocal(numst_, orbitals, hphi);
+                    hamiltonian_->applyDeltaPot(orbitals, hphi);
                     orbitals.addDotWithNcol2Matrix(hphi, h11);
 
                     proj_mat_work_->assignH(h11);
diff --git a/src/Potentials.cc b/src/Potentials.cc
index 4da9ab9a..2fd556f4 100644
--- a/src/Potentials.cc
+++ b/src/Potentials.cc
@@ -155,6 +155,7 @@ double Potentials::updateVtot(const std::vector<std::vector<RHODTYPE>>& rho)
     double minus = -1.;
     LinearAlgebraUtils<MemorySpace::Host>::MPaxpy(
         size_, minus, &vtot_[0], &dv_[0]);
+    LinearAlgebraUtils<MemorySpace::Host>::MPscal(size_, minus, &dv_[0]);
 
     evalNormDeltaVtotRho(rho);
 
diff --git a/src/Potentials.h b/src/Potentials.h
index c7062234..f5dc4636 100644
--- a/src/Potentials.h
+++ b/src/Potentials.h
@@ -155,6 +155,8 @@ class Potentials
     POTDTYPE* vtot() { return vtot_.data(); }
     RHODTYPE* rho_comp() { return rho_comp_.data(); }
 
+    const std::vector<POTDTYPE>& dv() { return dv_; }
+
     const std::vector<POTDTYPE>& vnuc() const { return v_nuc_; }
     const std::vector<POTDTYPE>& vh_rho() const { return vh_rho_; }