diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 21e2854c..a1923581 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -478,6 +478,27 @@ void BlockVector::axpy(const double alpha, LinearAlgebraUtils::MPaxpy( locnumel_, alpha, bv.vect_[ix] + shift, vect_[iy] + shift); } + +template +void BlockVector::applyDiagonalOp( + const std::vector& diag, + BlockVector& dst) const +{ + diagop_tm_.start(); + + const double* const dd = diag.data(); + + for (unsigned int j = 0; j < vect_.size(); j++) + { + const ScalarType* __restrict__ srcj = vect_[j]; + ScalarType* __restrict__ dstj = dst.vect_[j]; + for (int i = 0; i < numel_; i++) + dstj[i] = (ScalarType)(dd[i] * (double)srcj[i]); + } + + diagop_tm_.stop(); +} + template void BlockVector::hasnan(const int j) const { @@ -534,6 +555,7 @@ void BlockVector::printTimers(std::ostream& os) scal_tm_.print(os); opminus_tm_.print(os); copy_tm_.print(os); + diagop_tm_.print(os); } template diff --git a/src/BlockVector.h b/src/BlockVector.h index f424d78f..33016d62 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -34,6 +34,7 @@ class BlockVector static Timer scal_tm_; static Timer opminus_tm_; static Timer copy_tm_; + static Timer diagop_tm_; static short n_instances_; static short subdivx_; @@ -137,6 +138,9 @@ class BlockVector return vect_[i]; } + void applyDiagonalOp(const std::vector& diag, + BlockVector& dst) const; + ScalarType maxAbsValue() const; template @@ -331,4 +335,8 @@ Timer BlockVector::opminus_tm_( template Timer BlockVector::copy_tm_("BlockVector::copy"); + +template +Timer BlockVector::diagop_tm_( + "BlockVector::diagop"); #endif diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index 85f6fc9f..636de977 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -521,22 +521,29 @@ int DavidsonSolver::solve( kbpsi_2.computeHvnlMatrix(&kbpsi_2, ions_, h22nl); kbpsi_1.computeHvnlMatrix(&kbpsi_2, ions_, h12nl); + + h12 = h12nl; + h22 = h22nl; } else { - h11 = h11nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); } - // compute H*P and store in hphi - hamiltonian_->applyLocal(numst_, work_orbitals, hphi); + if (inner_it == 0) + { + // compute H*P and store in hphi + hamiltonian_->applyLocal(numst_, work_orbitals, hphi); + } + else + { + hamiltonian_->applyDeltaPot(work_orbitals, hphi); + } // update h22, h12 and h21 - h12 = h12nl; orbitals.addDotWithNcol2Matrix(hphi, h12); - h22 = h22nl; work_orbitals.addDotWithNcol2Matrix(hphi, h22); h21.transpose(1., h12, 0.); @@ -609,16 +616,11 @@ int DavidsonSolver::solve( energy_->saveVofRho(); // update h11, h22, h12, and h21 - h11 = h11nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); - hamiltonian_->applyLocal(numst_, work_orbitals, hphi); - - h22 = h22nl; + hamiltonian_->applyDeltaPot(work_orbitals, hphi); work_orbitals.addDotWithNcol2Matrix(hphi, h22); - - h12 = h12nl; orbitals.addDotWithNcol2Matrix(hphi, h12); h21.transpose(1., h12, 0.); diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 52ece157..c641459d 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -281,6 +281,12 @@ class ExtendedGridOrbitals : public Orbitals assert(numst_ < 10000); return numst_; } + void applyDiagonalOp( + const std::vector& v, ExtendedGridOrbitals& hphi) const + { + block_vector_.applyDiagonalOp(v, hphi.block_vector_); + } + short subdivx(void) const { return 1; } void printChromaticNumber(std::ostream& os) const { diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index aed9dbad..601fdc24 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -161,6 +161,14 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) apply_Hloc_tm_.stop(); } +template +void Hamiltonian::applyDeltaPot(const T& phi, T& hphi) +{ + const std::vector& dv(pot_->dv()); + + phi.applyDiagonalOp(dv, hphi); +} + // add to hij the elements // corresponding to the local part of the Hamiltonian template <> diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h index 0c87ceda..7263dff6 100644 --- a/src/Hamiltonian.h +++ b/src/Hamiltonian.h @@ -42,6 +42,11 @@ class Hamiltonian const OrbitalsType& applyLocal(OrbitalsType& phi, const bool force = false); void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); + /*! + * Apply potential difference dv to phi + */ + void applyDeltaPot(const OrbitalsType& phi, OrbitalsType& hphi); + template void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2, MatrixType& mat, const bool force); diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index a2cd525d..ee49497c 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -363,6 +363,12 @@ class LocGridOrbitals : public Orbitals << std::endl; } + void applyDiagonalOp( + const std::vector& v, LocGridOrbitals& hphi) const + { + block_vector_.applyDiagonalOp(v, hphi.block_vector_); + } + void scal(const double alpha) { block_vector_.scal(alpha); diff --git a/src/MGmol.cc b/src/MGmol.cc index a6af740e..cf57a746 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -902,8 +902,12 @@ void MGmol::printTimers() proj_matrices_->printTimers(os_); ShortSightedInverse::printTimers(os_); if (std::is_same>::value) + { MVPSolver, dist_matrix::DistMatrix>::printTimers(os_); + MVPSolver, + ReplicatedMatrix>::printTimers(os_); + } VariableSizeMatrixInterface::printTimers(os_); DataDistribution::printTimers(os_); PackedCommunicationBuffer::printTimers(os_); diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 3be7fa10..97bcc825 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -213,6 +213,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) OrbitalsType hphi("MVP_hphi", orbitals); + MatrixType h11(h11_nl); + for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++) { if (onpe0 && ct.verbose > 1) @@ -240,8 +242,14 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components - MatrixType h11(h11_nl); - hamiltonian_->applyLocal(numst_, orbitals, hphi); + if (inner_it == 0) + { + hamiltonian_->applyLocal(numst_, orbitals, hphi); + } + else + { + hamiltonian_->applyDeltaPot(orbitals, hphi); + } orbitals.addDotWithNcol2Matrix(hphi, h11); current_proj_mat->assignH(h11); @@ -320,8 +328,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) energy_->saveVofRho(); // update h11 - h11 = h11_nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); proj_mat_work_->assignH(h11); diff --git a/src/Potentials.cc b/src/Potentials.cc index 4da9ab9a..2fd556f4 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -155,6 +155,7 @@ double Potentials::updateVtot(const std::vector>& rho) double minus = -1.; LinearAlgebraUtils::MPaxpy( size_, minus, &vtot_[0], &dv_[0]); + LinearAlgebraUtils::MPscal(size_, minus, &dv_[0]); evalNormDeltaVtotRho(rho); diff --git a/src/Potentials.h b/src/Potentials.h index c7062234..f5dc4636 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -155,6 +155,8 @@ class Potentials POTDTYPE* vtot() { return vtot_.data(); } RHODTYPE* rho_comp() { return rho_comp_.data(); } + const std::vector& dv() { return dv_; } + const std::vector& vnuc() const { return v_nuc_; } const std::vector& vh_rho() const { return vh_rho_; }