diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp index c25a12df77..4b6132cac4 100644 --- a/source/module_basis/module_ao/parallel_orbitals.cpp +++ b/source/module_basis/module_ao/parallel_orbitals.cpp @@ -91,16 +91,6 @@ void Parallel_Orbitals::set_atomic_trace(const int* iat2iwt, const int &nat, con this->atom_begin_col[nat] = this->ncol; } -// Get the number of columns of the parallel orbital matrix -int Parallel_Orbitals::get_col_size()const -{ - return this->ncol; -} -// Get the number of rows of the parallel orbital matrix -int Parallel_Orbitals::get_row_size()const -{ - return this->nrow; -} // Get the number of columns of the orbital matrix of the iat-th atom int Parallel_Orbitals::get_col_size(int iat) const { diff --git a/source/module_basis/module_ao/parallel_orbitals.h b/source/module_basis/module_ao/parallel_orbitals.h index fb052de4e0..77f094e361 100644 --- a/source/module_basis/module_ao/parallel_orbitals.h +++ b/source/module_basis/module_ao/parallel_orbitals.h @@ -73,8 +73,8 @@ class Parallel_Orbitals : public Parallel_2D * get_col_size(iat) : number of columns of Hamiltonian matrix in atom iat * get_row_size(iat) : number of rows of Hamiltonian matrix in atom iat */ - int get_col_size()const; - int get_row_size()const; + int get_col_size()const { return this->ncol; }; + int get_row_size()const { return this->nrow; }; int get_col_size(int iat) const; int get_row_size(int iat) const; diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 56475e6840..4521071d65 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -64,24 +64,27 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); int ld_hk = this->_paraV->nrow; - int ld_hk2 = 2 * ld_hk; for (int is = 1; is <= this->_nspin; ++is) { int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; + hamilt::HContainer* target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int mat_size = row_size * col_size; + const int r_size = target_ap.get_R_size(); if (row_ap == -1 || col_ap == -1) { throw std::string("Atom-pair not belong this process"); @@ -89,131 +92,126 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) std::vector> tmp_DMR; if (PARAM.inp.nspin == 4) { - tmp_DMR.resize(tmp_ap.get_size()); + tmp_DMR.resize(mat_size * r_size, 0); } - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec(r_size * this->_nk); + std::vector target_DMR_mat_vec(r_size); + for(int ir = 0; ir < r_size; ++ir) { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); + hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif - // loop over k-points - if (PARAM.inp.nspin != 4) + target_DMR_mat_vec[ir] = target_mat->get_pointer(); + for(int ik = 0; ik < this->_nk; ++ik) + { + if(ik_in >= 0 && ik_in != ik) + { + continue; + } + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + } + } + + std::vector> tmp_DMK_mat(mat_size); + // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 + // step_trace is used when nspin = 4; + int step_trace[4]{}; + if(PARAM.inp.nspin == 4) + { + const int npol = 2; + for (int is = 0; is < npol; is++) { - for (int ik = 0; ik < this->_nk; ++ik) + for (int is2 = 0; is2 < npol; is2++) { - if(ik_in >= 0 && ik_in != ik) { continue; -} - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - double* tmp_DMR_pointer = tmp_matrix->get_pointer(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) - { - DMK_real_pointer = (double*)tmp_DMK_pointer; - DMK_imag_pointer = DMK_real_pointer + 1; - BlasConnector::axpy(this->_paraV->get_col_size(iat2), - kphase.real(), - DMK_real_pointer, - ld_hk2, - tmp_DMR_pointer, - 1); - // "-" since i^2 = -1 - BlasConnector::axpy(this->_paraV->get_col_size(iat2), - -kphase.imag(), - DMK_imag_pointer, - ld_hk2, - tmp_DMR_pointer, - 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); - } + step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; } } + } + for(int ik = 0; ik < this->_nk; ++ik) + { + if(ik_in >= 0 && ik_in != ik) + { + continue; + } - // treat DMR as pauli matrix when NSPIN=4 - if (PARAM.inp.nspin == 4) + // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) + const std::complex* DMK_mat_ptr = this->_DMK[ik + ik_begin].data() + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) { - tmp_DMR.assign(tmp_ap.get_size(), std::complex(0.0, 0.0)); - for (int ik = 0; ik < this->_nk; ++ik) + for(int irow = 0; irow < row_size; ++irow) { - if(ik_in >= 0 && ik_in != ik) { continue; -} - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - std::complex* tmp_DMR_pointer = tmp_DMR.data(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < tmp_ap.get_row_size(); ++mu) - { - BlasConnector::axpy(tmp_ap.get_col_size(), - kphase, - tmp_DMK_pointer, - ld_hk, - tmp_DMR_pointer, - 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += tmp_ap.get_col_size(); - } + tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; } - int npol = 2; - // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 - int step_trace[4]; - for (int is = 0; is < npol; is++) + } + + // if nspin != 4, fill DMR + // if nspin == 4, fill tmp_DMR + for(int ir = 0; ir < r_size; ++ir) + { + std::complex kphase = kphase_vec[ik * r_size + ir]; + if(PARAM.inp.nspin != 4) { - for (int is2 = 0; is2 < npol; is2++) + double* target_DMR_mat = target_DMR_mat_vec[ir]; + for(int i = 0; i < mat_size; i++) { - step_trace[is * npol + is2] = tmp_ap.get_col_size() * is + is2; + target_DMR_mat[i] += kphase.real() * tmp_DMK_mat[i].real() + - kphase.imag() * tmp_DMK_mat[i].imag(); } + } else if(PARAM.inp.nspin == 4) + { + std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; + BlasConnector::axpy(mat_size, + kphase, + tmp_DMK_mat.data(), + 1, + tmp_DMR_mat, + 1); } - std::complex tmp[4]; - double* target_DMR = tmp_matrix->get_pointer(); - std::complex* tmp_DMR_pointer = tmp_DMR.data(); - for (int irow = 0; irow < tmp_ap.get_row_size(); irow += 2) + } + } + + // if nspin == 4 + // copy tmp_DMR to fill target_DMR + if(PARAM.inp.nspin == 4) + { + std::complex tmp[4]{}; + for(int ir = 0; ir < r_size; ++ir) + { + std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; + double* target_DMR_mat = target_DMR_mat_vec[ir]; + for (int irow = 0; irow < row_size; irow += 2) { - for (int icol = 0; icol < tmp_ap.get_col_size(); icol += 2) + for (int icol = 0; icol < col_size; icol += 2) { // catch the 4 spin component value of one orbital pair - tmp[0] = tmp_DMR_pointer[icol + step_trace[0]]; - tmp[1] = tmp_DMR_pointer[icol + step_trace[1]]; - tmp[2] = tmp_DMR_pointer[icol + step_trace[2]]; - tmp[3] = tmp_DMR_pointer[icol + step_trace[3]]; + tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; + tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; + tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; + tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part - // save them back to the tmp_matrix - target_DMR[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); - target_DMR[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); - target_DMR[icol + step_trace[2]] + // save them back to the target_mat + target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); + target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); + target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() - target_DMR[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); + target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); } - tmp_DMR_pointer += tmp_ap.get_col_size() * 2; - target_DMR += tmp_ap.get_col_size() * 2; + tmp_DMR_mat += col_size * 2; + target_DMR_mat += col_size * 2; } } } @@ -232,35 +230,41 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); int ld_hk = this->_paraV->nrow; - int ld_hk2 = 2 * ld_hk; - hamilt::HContainer>* tmp_DMR = dmR_out; + hamilt::HContainer>* target_DMR = dmR_out; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - auto& tmp_ap = tmp_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + auto& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int mat_size = row_size * col_size; + const int r_size = target_ap.get_R_size(); + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec(r_size * this->_nk); + std::vector*> target_DMR_mat_vec(r_size); + for(int ir = 0; ir < r_size; ++ir) { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - auto* tmp_matrix = tmp_ap.find_matrix(r_index); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); + hamilt::BaseMatrix>* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif - // loop over k-points - // calculate full matrix for complex density matrix - for (int ik = 0; ik < this->_nk; ++ik) + target_DMR_mat_vec[ir] = target_mat->get_pointer(); + for(int ik = 0; ik < this->_nk; ++ik) { // cal k_phase // if TK==std::complex, kphase is e^{ikR} @@ -268,27 +272,34 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - std::complex* tmp_DMR_pointer = tmp_matrix->get_pointer(); - const std::complex* tmp_DMK_pointer = this->_DMK[ik].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) + kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + } + } + + std::vector> tmp_DMK_mat(mat_size); + for(int ik = 0; ik < this->_nk; ++ik) + { + // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) + const std::complex* DMK_mat_ptr = this->_DMK[ik].data() + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) + { + for(int irow = 0; irow < row_size; ++irow) { - BlasConnector::axpy(this->_paraV->get_col_size(iat2), - kphase, - tmp_DMK_pointer, - ld_hk, - tmp_DMR_pointer, - 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); + tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; } } + + for(int ir = 0; ir < r_size; ++ir) + { + std::complex kphase = kphase_vec[ik * r_size + ir]; + std::complex* target_DMR_mat = target_DMR_mat_vec[ir]; + BlasConnector::axpy(mat_size, + kphase, + tmp_DMK_mat.data(), + 1, + target_DMR_mat, + 1); + } } } ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); @@ -312,60 +323,63 @@ void DensityMatrix::cal_DMR(const int ik_in) for (int is = 1; is <= this->_nspin; ++is) { int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; + hamilt::HContainer* target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef __DEBUG - // assert(tmp_DMR->is_gamma_only() == true); + // assert(target_DMR->is_gamma_only() == true); assert(this->_nk == 1); #endif #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int r_size = target_ap.get_R_size(); if (row_ap == -1 || col_ap == -1) { throw std::string("Atom-pair not belong this process"); } // R index - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(0); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(0); #ifdef __DEBUG - assert(tmp_ap.get_R_size() == 1); + assert(r_size == 1); assert(r_index.x == 0 && r_index.y == 0 && r_index.z == 0); #endif - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); + hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif // k index double kphase = 1; // set DMR element - double* tmp_DMR_pointer = tmp_matrix->get_pointer(); - double* tmp_DMK_pointer = this->_DMK[0 + ik_begin].data(); + double* target_DMR_ptr = target_mat->get_pointer(); + double* DMK_ptr = this->_DMK[0 + ik_begin].data(); // transpose DMK col=>row - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) + DMK_ptr += col_ap * this->_paraV->nrow + row_ap; + for (int mu = 0; mu < row_size; ++mu) { - BlasConnector::axpy(this->_paraV->get_col_size(iat2), + BlasConnector::axpy(col_size, kphase, - tmp_DMK_pointer, + DMK_ptr, ld_hk, - tmp_DMR_pointer, + target_DMR_ptr, 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); + DMK_ptr += 1; + target_DMR_ptr += col_size; } } } diff --git a/source/module_hamilt_lcao/module_hcontainer/atom_pair.h b/source/module_hamilt_lcao/module_hcontainer/atom_pair.h index ed6f205fa1..f0593641f9 100644 --- a/source/module_hamilt_lcao/module_hcontainer/atom_pair.h +++ b/source/module_hamilt_lcao/module_hcontainer/atom_pair.h @@ -118,6 +118,12 @@ class AtomPair */ void set_zero(); + /** + * @brief get begin index of this AtomPair + */ + int get_begin_row() const { return this->row_ap; } + int get_begin_col() const { return this->col_ap; } + /** * @brief get col_size for this AtomPair */ diff --git a/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp b/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp index af8f7c76d5..6d235b1d67 100644 --- a/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp +++ b/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp @@ -15,7 +15,7 @@ template void folding_HR(const hamilt::HContainer& hR, std::complex* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type) { #ifdef _OPENMP @@ -24,9 +24,18 @@ void folding_HR(const hamilt::HContainer& hR, for (int i = 0; i < hR.size_atom_pairs(); ++i) { hamilt::AtomPair& tmp = hR.get_atom_pair(i); - for(int ir = 0;ir < tmp.get_R_size(); ++ir ) + const int row_size = tmp.get_row_size(); + const int col_size = tmp.get_col_size(); + // copy hk to hk_type + // hk_tmp is row-major and stored contiguously in memory, + // so copy hr to hk_tmp is faster than copy hr to hk + std::vector> hk_mat_tmp(row_size * col_size, 0); + + // copy hr to hk_tmp + for(int ir = 0; ir < tmp.get_R_size(); ++ir) { const ModuleBase::Vector3 r_index = tmp.get_R_index(ir); + TR* hr_mat = tmp.get_pointer(ir); // cal k_phase // if TK==std::complex, kphase is e^{ikR} const ModuleBase::Vector3 dR(r_index.x, r_index.y, r_index.z); @@ -34,9 +43,35 @@ void folding_HR(const hamilt::HContainer& hR, double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); + + for(int i = 0; i < row_size * col_size; ++i) + { + hk_mat_tmp[i] += kphase * hr_mat[i]; + } + } - tmp.find_R(r_index); - tmp.add_to_matrix(hk, ncol, kphase, hk_type); + // copy hk_tmp to hk + if (hk_type == 0) + { + std::complex* hk_mat = hk + tmp.get_begin_row() * hk_ld + tmp.get_begin_col(); + for(int irow = 0; irow < row_size; ++irow) + { + for(int icol = 0; icol < col_size; ++icol) + { + hk_mat[irow * hk_ld + icol] += hk_mat_tmp[irow * col_size + icol]; + } + } + } + else if(hk_type == 1) + { + std::complex* hk_mat = hk + tmp.get_begin_col() * hk_ld + tmp.get_begin_row(); + for(int icol = 0; icol < col_size; ++icol) + { + for(int irow = 0; irow < row_size; ++irow) + { + hk_mat[icol * hk_ld + irow] += hk_mat_tmp[irow * col_size + icol]; + } + } } } /*for (int i = 0; i < hR.size_R_loop(); ++i) @@ -82,7 +117,7 @@ template void folding_HR(const hamilt::HContainer& hR, void folding_HR(const hamilt::HContainer& hR, double* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type) { // in ABACUS, this function works with gamma-only case. @@ -97,7 +132,7 @@ void folding_HR(const hamilt::HContainer& hR, double kphase = 1.0; // Hk = HR - hR.get_atom_pair(i).add_to_matrix(hk, ncol, kphase, hk_type); + hR.get_atom_pair(i).add_to_matrix(hk, hk_ld , kphase, hk_type); } } diff --git a/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h b/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h index 31bf65e3da..2243f05ba3 100644 --- a/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h +++ b/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h @@ -16,13 +16,13 @@ template void folding_HR(const hamilt::HContainer& hR, std::complex* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type); void folding_HR(const hamilt::HContainer& hR, double* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type); #ifdef __MPI