diff --git a/source/source_pw/module_pwdft/kernels/cuda/mul_potential_op.cu b/source/source_pw/module_pwdft/kernels/cuda/mul_potential_op.cu index ad148deb23..1724a5e2eb 100644 --- a/source/source_pw/module_pwdft/kernels/cuda/mul_potential_op.cu +++ b/source/source_pw/module_pwdft/kernels/cuda/mul_potential_op.cu @@ -37,7 +37,7 @@ struct mul_potential_op, base_device::DEVICE_GPU> int num_blocks = (npw + threads_per_block - 1) / threads_per_block; mul_potential_kernel<<>>( - pot + ik * nks * npw + iq * npw, + pot, reinterpret_cast*>(density_recip), npw); diff --git a/source/source_pw/module_pwdft/kernels/mul_potential_op.cpp b/source/source_pw/module_pwdft/kernels/mul_potential_op.cpp index 85647e065d..53cf7a6ff3 100644 --- a/source/source_pw/module_pwdft/kernels/mul_potential_op.cpp +++ b/source/source_pw/module_pwdft/kernels/mul_potential_op.cpp @@ -14,8 +14,8 @@ struct mul_potential_op, base_device::DEVICE_CPU> #endif for (int ig = 0; ig < npw; ig++) { - int ig_kq = ik * nks * npw + iq * npw + ig; - density_recip[ig] *= pot[ig_kq]; + // int ig_kq = ik * nks * npw + iq * npw + ig; + density_recip[ig] *= pot[ig]; } } diff --git a/source/source_pw/module_pwdft/operator_pw/exx_pw_pot.cpp b/source/source_pw/module_pwdft/operator_pw/exx_pw_pot.cpp index 6019ac110c..89918e16c8 100644 --- a/source/source_pw/module_pwdft/operator_pw/exx_pw_pot.cpp +++ b/source/source_pw/module_pwdft/operator_pw/exx_pw_pot.cpp @@ -11,7 +11,10 @@ void get_exx_potential(const K_Vectors* kv, Real* pot, double tpiba, bool gamma_extrapolation, - double ucell_omega) + double ucell_omega, + int ik, + int iq, + bool is_stress) { using setmem_real_cpu_op = base_device::memory::set_memory_op; using syncmem_real_c2d_op = base_device::memory::synchronize_memory_op; @@ -23,180 +26,168 @@ void get_exx_potential(const K_Vectors* kv, Real* pot_cpu = nullptr; int nks = wfcpw->nks, npw = rhopw_dev->npw; double tpiba2 = tpiba * tpiba; - pot_cpu = new Real[npw * wfcpw->nks * wfcpw->nks]; + pot_cpu = new Real[npw]; // fill zero - setmem_real_cpu_op()(pot_cpu, 0, npw * nks * nks); + setmem_real_cpu_op()(pot_cpu, 0, npw); // calculate Fock pot auto param_fock = GlobalC::exx_info.info_global.coulomb_param[Conv_Coulomb_Pot_K::Coulomb_Type::Fock]; - for (auto param: param_fock) + for (int i = 0; i < param_fock.size(); i++) { - double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Fock, - 0.0, - kv, - wfcpw, - rhopw_dev, - tpiba, - gamma_extrapolation, - ucell_omega); + auto param = param_fock[i]; + double exx_div = OperatorEXXPW, Device>::fock_div[i]; double alpha = std::stod(param["alpha"]); - for (int ik = 0; ik < nks; ik++) - { - for (int iq = 0; iq < nks; iq++) - { - const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; - const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; - const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; - const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; + const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; + const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; + const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; + const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < rhopw_dev->npw; ig++) + for (int ig = 0; ig < rhopw_dev->npw; ig++) + { + const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; + const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; + // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) + // 7/8 of the points in the grid are "activated" and 1/8 are disabled. + // grid_factor is designed for the 7/8 of the grid to function like all of the points + Real grid_factor = 1; + double extrapolate_grid = 8.0 / 7.0; + if (gamma_extrapolation) + { + // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) + auto isint = [](double x) { + double epsilon = 1e-6; // this follows the isint judgement in q-e + return std::abs(x - std::round(x)) < epsilon; + }; + if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) { - const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; - const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; - // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) - // 7/8 of the points in the grid are "activated" and 1/8 are disabled. - // grid_factor is designed for the 7/8 of the grid to function like all of the points - Real grid_factor = 1; - double extrapolate_grid = 8.0 / 7.0; - if (gamma_extrapolation) - { - // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) - auto isint = [](double x) { - double epsilon = 1e-6; // this follows the isint judgement in q-e - return std::abs(x - std::round(x)) < epsilon; - }; - if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) - { - grid_factor = 0; - } - else - { - grid_factor = extrapolate_grid; - } - } - - const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; - const int nk = nks / nk_fac; - const int ig_kq = ik * nks * npw + iq * npw + ig; - - Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; - // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 - if (gg >= 1e-8) - { - Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; - pot_cpu[ig_kq] += fac * grid_factor * alpha; - } - // } - else - { - pot_cpu[ig_kq] += exx_div * alpha; - } - // assert(is_finite(density_recip[ig])); + grid_factor = 0; } + else + { + grid_factor = extrapolate_grid; + } + } + + const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; + const int nk = nks / nk_fac; + + Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; + // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 + if (gg >= 1e-8) + { + Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; + pot_cpu[ig] += fac * grid_factor * alpha; } + // } + else + { + pot_cpu[ig] += exx_div * alpha; + } + // assert(is_finite(density_recip[ig])); } } // calculate erfc pot auto param_erfc = GlobalC::exx_info.info_global.coulomb_param[Conv_Coulomb_Pot_K::Coulomb_Type::Erfc]; - for (auto param: param_erfc) + for (int i = 0; i < param_erfc.size(); i++) { + auto param = param_erfc[i]; double erfc_omega = std::stod(param["omega"]); double erfc_omega2 = erfc_omega * erfc_omega; double alpha = std::stod(param["alpha"]); + // double exx_div = OperatorEXXPW, Device>::erfc_div[i]; double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Erfc, - erfc_omega, - kv, - wfcpw, - rhopw_dev, - tpiba, - gamma_extrapolation, - ucell_omega); - for (int ik = 0; ik < nks; ik++) - { - for (int iq = 0; iq < nks; iq++) - { - const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; - const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; - const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; - const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; + erfc_omega, + kv, + wfcpw, + rhopw_dev, + tpiba, + gamma_extrapolation, + ucell_omega); + const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; + const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; + const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; + const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < rhopw_dev->npw; ig++) + for (int ig = 0; ig < rhopw_dev->npw; ig++) + { + const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; + const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; + // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) + // 7/8 of the points in the grid are "activated" and 1/8 are disabled. + // grid_factor is designed for the 7/8 of the grid to function like all of the points + Real grid_factor = 1; + double extrapolate_grid = 8.0 / 7.0; + if (gamma_extrapolation) + { + // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) + auto isint = [](double x) { + double epsilon = 1e-6; // this follows the isint judgement in q-e + return std::abs(x - std::round(x)) < epsilon; + }; + if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) + { + grid_factor = 0; + } + else + { + grid_factor = extrapolate_grid; + } + } + + const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; + const int nk = nks / nk_fac; + // const int ig_kq = ik * nks * npw + iq * npw + ig; + + Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; + // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 + if (gg >= 1e-8) + { + Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; + pot_cpu[ig] += fac * (1.0 - std::exp(-gg / 4.0 / erfc_omega2)) * grid_factor * alpha; + } + // } + else + { + // if (PARAM.inp.dft_functional == "hse") + if (!gamma_extrapolation) { - const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; - const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; - // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) - // 7/8 of the points in the grid are "activated" and 1/8 are disabled. - // grid_factor is designed for the 7/8 of the grid to function like all of the points - Real grid_factor = 1; - double extrapolate_grid = 8.0 / 7.0; - if (gamma_extrapolation) - { - // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) - auto isint = [](double x) { - double epsilon = 1e-6; // this follows the isint judgement in q-e - return std::abs(x - std::round(x)) < epsilon; - }; - if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) - { - grid_factor = 0; - } - else - { - grid_factor = extrapolate_grid; - } - } - - const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; - const int nk = nks / nk_fac; - const int ig_kq = ik * nks * npw + iq * npw + ig; - - Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; - // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 - if (gg >= 1e-8) - { - Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; - pot_cpu[ig_kq] += fac * (1.0 - std::exp(-gg / 4.0 / erfc_omega2)) * grid_factor * alpha; - } - // } + if (is_stress) + pot_cpu[ig] += (- ModuleBase::PI * ModuleBase::e2 / erfc_omega2) * alpha; else - { - // if (PARAM.inp.dft_functional == "hse") - if (!gamma_extrapolation) - { - pot_cpu[ig_kq] += (exx_div - ModuleBase::PI * ModuleBase::e2 / erfc_omega2) * alpha; - } - else - { - pot_cpu[ig_kq] += exx_div * alpha; - } - } - // assert(is_finite(density_recip[ig])); + pot_cpu[ig] += (exx_div - ModuleBase::PI * ModuleBase::e2 / erfc_omega2) * alpha; + } + else + { + pot_cpu[ig] += exx_div * alpha; } } + // assert(is_finite(density_recip[ig])); } } // copy the potential to the device memory - syncmem_real_c2d_op()(pot, pot_cpu, rhopw_dev->npw * wfcpw->nks * wfcpw->nks); + syncmem_real_c2d_op()(pot, pot_cpu, rhopw_dev->npw); delete pot_cpu; } template void get_exx_stress_potential(const K_Vectors* kv, - const ModulePW::PW_Basis_K* wfcpw, - ModulePW::PW_Basis* rhopw_dev, - Real* pot, - double tpiba, - bool gamma_extrapolation, - double ucell_omega) + const ModulePW::PW_Basis_K* wfcpw, + ModulePW::PW_Basis* rhopw_dev, + Real* pot, + double tpiba, + bool gamma_extrapolation, + double ucell_omega, + int ik, + int iq) { using setmem_real_cpu_op = base_device::memory::set_memory_op; using syncmem_real_c2d_op = base_device::memory::synchronize_memory_op; @@ -208,74 +199,69 @@ void get_exx_stress_potential(const K_Vectors* kv, Real* pot_cpu = nullptr; int nks = wfcpw->nks, npw = rhopw_dev->npw; double tpiba2 = tpiba * tpiba; - pot_cpu = new Real[npw * wfcpw->nks * wfcpw->nks]; + pot_cpu = new Real[npw]; // fill zero - setmem_real_cpu_op()(pot_cpu, 0, npw * nks * nks); + setmem_real_cpu_op()(pot_cpu, 0, npw); // calculate Fock pot auto param_fock = GlobalC::exx_info.info_global.coulomb_param[Conv_Coulomb_Pot_K::Coulomb_Type::Fock]; for (auto param: param_fock) { - double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Fock, - 0.0, - kv, - wfcpw, - rhopw_dev, - tpiba, - gamma_extrapolation, - ucell_omega); + // double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Fock, + // 0.0, + // kv, + // wfcpw, + // rhopw_dev, + // tpiba, + // gamma_extrapolation, + // ucell_omega); double alpha = std::stod(param["alpha"]); - for (int ik = 0; ik < nks; ik++) - { - for (int iq = 0; iq < nks; iq++) - { - const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; - const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; - const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; - const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; + + const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; + const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; + const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; + const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < rhopw_dev->npw; ig++) + for (int ig = 0; ig < rhopw_dev->npw; ig++) + { + const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; + const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; + // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) + // 7/8 of the points in the grid are "activated" and 1/8 are disabled. + // grid_factor is designed for the 7/8 of the grid to function like all of the points + Real grid_factor = 1; + double extrapolate_grid = 8.0 / 7.0; + if (gamma_extrapolation) + { + // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) + auto isint = [](double x) { + double epsilon = 1e-6; // this follows the isint judgement in q-e + return std::abs(x - std::round(x)) < epsilon; + }; + if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) { - const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; - const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; - // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) - // 7/8 of the points in the grid are "activated" and 1/8 are disabled. - // grid_factor is designed for the 7/8 of the grid to function like all of the points - Real grid_factor = 1; - double extrapolate_grid = 8.0 / 7.0; - if (gamma_extrapolation) - { - // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) - auto isint = [](double x) { - double epsilon = 1e-6; // this follows the isint judgement in q-e - return std::abs(x - std::round(x)) < epsilon; - }; - if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) - { - grid_factor = 0; - } - else - { - grid_factor = extrapolate_grid; - } - } - - const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; - const int nk = nks / nk_fac; - const int ig_kq = ik * nks * npw + iq * npw + ig; - - Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; - // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 - if (gg >= 1e-8) - { - Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; - pot_cpu[ig_kq] += 1.0 / gg * grid_factor * alpha; - } + grid_factor = 0; + } + else + { + grid_factor = extrapolate_grid; } } + + const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; + const int nk = nks / nk_fac; + // const int ig_kq = ik * nks * npw + iq * npw + ig; + + Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; + // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 + if (gg >= 1e-8) + { + Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; + pot_cpu[ig] += 1.0 / gg * grid_factor * alpha; + } } } @@ -286,80 +272,76 @@ void get_exx_stress_potential(const K_Vectors* kv, double erfc_omega = std::stod(param["omega"]); double erfc_omega2 = erfc_omega * erfc_omega; double alpha = std::stod(param["alpha"]); - double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Erfc, - erfc_omega, - kv, - wfcpw, - rhopw_dev, - tpiba, - gamma_extrapolation, - ucell_omega); - for (int ik = 0; ik < nks; ik++) - { - for (int iq = 0; iq < nks; iq++) - { - const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; - const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; - const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; - const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; + // double exx_div = exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Erfc, + // erfc_omega, + // kv, + // wfcpw, + // rhopw_dev, + // tpiba, + // gamma_extrapolation, + // ucell_omega); + + const ModuleBase::Vector3 k_c = wfcpw->kvec_c[ik]; + const ModuleBase::Vector3 k_d = wfcpw->kvec_d[ik]; + const ModuleBase::Vector3 q_c = wfcpw->kvec_c[iq]; + const ModuleBase::Vector3 q_d = wfcpw->kvec_d[iq]; #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int ig = 0; ig < rhopw_dev->npw; ig++) + for (int ig = 0; ig < rhopw_dev->npw; ig++) + { + const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; + const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; + // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) + // 7/8 of the points in the grid are "activated" and 1/8 are disabled. + // grid_factor is designed for the 7/8 of the grid to function like all of the points + Real grid_factor = 1; + double extrapolate_grid = 8.0 / 7.0; + if (gamma_extrapolation) + { + // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) + auto isint = [](double x) { + double epsilon = 1e-6; // this follows the isint judgement in q-e + return std::abs(x - std::round(x)) < epsilon; + }; + if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) { - const ModuleBase::Vector3 g_d = rhopw_dev->gdirect[ig]; - const ModuleBase::Vector3 kqg_d = k_d - q_d + g_d; - // For gamma_extrapolation (https://doi.org/10.1103/PhysRevB.79.205114) - // 7/8 of the points in the grid are "activated" and 1/8 are disabled. - // grid_factor is designed for the 7/8 of the grid to function like all of the points - Real grid_factor = 1; - double extrapolate_grid = 8.0 / 7.0; - if (gamma_extrapolation) - { - // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3) - auto isint = [](double x) { - double epsilon = 1e-6; // this follows the isint judgement in q-e - return std::abs(x - std::round(x)) < epsilon; - }; - if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)) - { - grid_factor = 0; - } - else - { - grid_factor = extrapolate_grid; - } - } - - const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; - const int nk = nks / nk_fac; - const int ig_kq = ik * nks * npw + iq * npw + ig; - - Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; - // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 - if (gg >= 1e-8) - { - Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; - pot_cpu[ig_kq] += (1.0 - (1.0 + gg / 4.0 / erfc_omega2) * std::exp(-gg / 4.0 / erfc_omega2)) / (1.0 - std::exp(-gg / 4.0 / erfc_omega2)) / gg * grid_factor * alpha; - } - // } - else - { - // if (PARAM.inp.dft_functional == "hse") - if (!gamma_extrapolation) - { - pot_cpu[ig_kq] += 1.0 / 4.0 / erfc_omega2 * alpha; - } - } - // assert(is_finite(density_recip[ig])); + grid_factor = 0; + } + else + { + grid_factor = extrapolate_grid; } } + + const int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; + const int nk = nks / nk_fac; + // const int ig_kq = ik * nks * npw + iq * npw + ig; + + Real gg = (k_c - q_c + rhopw_dev->gcar[ig]).norm2() * tpiba2; + // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2 + if (gg >= 1e-8) + { + Real fac = -ModuleBase::FOUR_PI * ModuleBase::e2 / gg; + pot_cpu[ig] += (1.0 - (1.0 + gg / 4.0 / erfc_omega2) * std::exp(-gg / 4.0 / erfc_omega2)) + / (1.0 - std::exp(-gg / 4.0 / erfc_omega2)) / gg * grid_factor * alpha; + } + // } + else + { + // if (PARAM.inp.dft_functional == "hse") + if (!gamma_extrapolation) + { + pot_cpu[ig] += 1.0 / 4.0 / erfc_omega2 * alpha; + } + } + // assert(is_finite(density_recip[ig])); } } // copy the potential to the device memory - syncmem_real_c2d_op()(pot, pot_cpu, rhopw_dev->npw * wfcpw->nks * wfcpw->nks); + syncmem_real_c2d_op()(pot, pot_cpu, rhopw_dev->npw); delete pot_cpu; } @@ -495,28 +477,38 @@ template void get_exx_potential(const K_Vectors* float*, double, bool, - double); + double, + int, + int, + bool); template void get_exx_potential(const K_Vectors*, const ModulePW::PW_Basis_K*, ModulePW::PW_Basis*, double*, double, bool, - double); + double, + int, + int, + bool); template void get_exx_stress_potential(const K_Vectors*, - const ModulePW::PW_Basis_K*, - ModulePW::PW_Basis*, - float*, - double, - bool, - double); + const ModulePW::PW_Basis_K*, + ModulePW::PW_Basis*, + float*, + double, + bool, + double, + int, + int); template void get_exx_stress_potential(const K_Vectors*, - const ModulePW::PW_Basis_K*, - ModulePW::PW_Basis*, - double*, - double, - bool, - double); + const ModulePW::PW_Basis_K*, + ModulePW::PW_Basis*, + double*, + double, + bool, + double, + int, + int); #if ((defined __CUDA) || (defined __ROCM)) template class OperatorEXXPW, base_device::DEVICE_GPU>; template class OperatorEXXPW, base_device::DEVICE_GPU>; @@ -526,27 +518,37 @@ template void get_exx_potential(const K_Vectors* float*, double, bool, - double); + double, + int, + int, + bool); template void get_exx_potential(const K_Vectors*, const ModulePW::PW_Basis_K*, ModulePW::PW_Basis*, double*, double, bool, - double); + double, + int, + int, + bool); template void get_exx_stress_potential(const K_Vectors*, - const ModulePW::PW_Basis_K*, - ModulePW::PW_Basis*, - float*, - double, - bool, - double); + const ModulePW::PW_Basis_K*, + ModulePW::PW_Basis*, + float*, + double, + bool, + double, + int, + int); template void get_exx_stress_potential(const K_Vectors*, - const ModulePW::PW_Basis_K*, - ModulePW::PW_Basis*, - double*, - double, - bool, - double); + const ModulePW::PW_Basis_K*, + ModulePW::PW_Basis*, + double*, + double, + bool, + double, + int, + int); #endif } // namespace hamilt \ No newline at end of file diff --git a/source/source_pw/module_pwdft/operator_pw/op_exx_pw.cpp b/source/source_pw/module_pwdft/operator_pw/op_exx_pw.cpp index 83879d634f..404c660877 100644 --- a/source/source_pw/module_pwdft/operator_pw/op_exx_pw.cpp +++ b/source/source_pw/module_pwdft/operator_pw/op_exx_pw.cpp @@ -23,6 +23,11 @@ namespace hamilt { +template +std::vector::type> OperatorEXXPW::fock_div = {}; + +template +std::vector::type> OperatorEXXPW::erfc_div = {}; template OperatorEXXPW::OperatorEXXPW(const int* isk_in, @@ -66,7 +71,7 @@ OperatorEXXPW::OperatorEXXPW(const int* isk_in, int nks = wfcpw->nks; int nk_fac = PARAM.inp.nspin == 2 ? 2 : 1; - resmem_real_op()(pot, rhopw->npw * nks * nks); + resmem_real_op()(pot, rhopw->npw); tpiba = ucell->tpiba; Real tpiba2 = tpiba * tpiba; @@ -83,6 +88,31 @@ OperatorEXXPW::OperatorEXXPW(const int* isk_in, rhopw_dev->setuptransform(); rhopw_dev->collect_local_pw(); + auto param_fock = GlobalC::exx_info.info_global.coulomb_param[Conv_Coulomb_Pot_K::Coulomb_Type::Fock]; + for (auto param: param_fock) + { + fock_div.push_back(exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Fock, + 0.0, + kv, + wfcpw, + rhopw_dev, + tpiba, + gamma_extrapolation, + ucell->omega)); + } + auto param_erfc = GlobalC::exx_info.info_global.coulomb_param[Conv_Coulomb_Pot_K::Coulomb_Type::Erfc]; + for (auto param: param_erfc) + { + erfc_div.push_back(exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type::Erfc, + std::stod(param["omega"]), + kv, + wfcpw, + rhopw_dev, + tpiba, + gamma_extrapolation, + ucell->omega)); + } + } // end of constructor template @@ -168,11 +198,7 @@ void OperatorEXXPW::act_op(const int nbands, // << " ngk_ik: " << ngk_ik // << " is_first_node: " << is_first_node // << std::endl; - if (!potential_got) - { - get_exx_potential(kv, wfcpw, rhopw_dev, pot, tpiba, gamma_extrapolation, ucell->omega); - potential_got = true; - } + // get_exx_potential(kv, wfcpw, rhopw_dev, pot, tpiba, gamma_extrapolation, ucell->omega, ik, iq); // set_psi(&p_exx_helper->psi); @@ -199,6 +225,7 @@ void OperatorEXXPW::act_op(const int nbands, Real nqs = q_points.size(); for (int iq: q_points) { + get_exx_potential(kv, wfcpw, rhopw_dev, pot, tpiba, gamma_extrapolation, ucell->omega, this->ik, iq); // std::cout << "ik" << this->ik << " iq" << iq << std::endl; for (int m_iband = 0; m_iband < psi.get_nbands(); m_iband++) { @@ -421,6 +448,7 @@ double OperatorEXXPW::cal_exx_energy_op(psi::Psi *ppsi_) c for (int iq: q_points) { + get_exx_potential(kv, wfcpw, rhopw_dev, pot, tpiba, gamma_extrapolation, ucell->omega, ik, iq); for (int m_iband = 0; m_iband < psi.get_nbands(); m_iband++) { // double wg_f = GlobalC::exx_helper.wg(iq, m_iband); @@ -441,7 +469,7 @@ double OperatorEXXPW::cal_exx_energy_op(psi::Psi *ppsi_) c int nks = wfcpw->nks; int npw = rhopw_dev->npw; int nk = nks / nk_fac; - Eexx_ik_real += exx_cal_energy_op()(density_recip, pot + ik * nks * npw + iq * npw, wg_iqb_real / nqs * wg_ikb_real / kv->wk[ik], npw); + Eexx_ik_real += exx_cal_energy_op()(density_recip, pot, wg_iqb_real / nqs * wg_ikb_real / kv->wk[ik], npw); } // m_iband diff --git a/source/source_pw/module_pwdft/operator_pw/op_exx_pw.h b/source/source_pw/module_pwdft/operator_pw/op_exx_pw.h index 577db11b04..d823eda015 100644 --- a/source/source_pw/module_pwdft/operator_pw/op_exx_pw.h +++ b/source/source_pw/module_pwdft/operator_pw/op_exx_pw.h @@ -55,13 +55,14 @@ class OperatorEXXPW : public OperatorPW bool first_iter = true; + static std::vector fock_div, erfc_div; + private: const int* isk = nullptr; const ModulePW::PW_Basis_K* wfcpw = nullptr; const ModulePW::PW_Basis* rhopw = nullptr; ModulePW::PW_Basis* rhopw_dev = nullptr; // for device const UnitCell *ucell = nullptr; -// Real exx_div = 0; Real tpiba = 0; std::vector get_q_points(const int ik) const; @@ -167,7 +168,10 @@ void get_exx_potential(const K_Vectors* kv, Real* pot, double tpiba, bool gamma_extrapolation, - double ucell_omega); + double ucell_omega, + int ik, + int iq, + bool is_stress = false); template void get_exx_stress_potential(const K_Vectors* kv, @@ -176,7 +180,9 @@ void get_exx_stress_potential(const K_Vectors* kv, Real* pot, double tpiba, bool gamma_extrapolation, - double ucell_omega); + double ucell_omega, + int ik, + int iq); double exx_divergence(Conv_Coulomb_Pot_K::Coulomb_Type coulomb_type, double erfc_omega, diff --git a/source/source_pw/module_pwdft/stress_func_exx.cpp b/source/source_pw/module_pwdft/stress_func_exx.cpp index 4aef3c872f..dd2c6570a0 100644 --- a/source/source_pw/module_pwdft/stress_func_exx.cpp +++ b/source/source_pw/module_pwdft/stress_func_exx.cpp @@ -1,5 +1,6 @@ #include "global.h" #include "operator_pw/op_exx_pw.h" +#include "source_base/parallel_common.h" #include "stress_pw.h" template @@ -10,19 +11,15 @@ void Stress_PW::stress_exx(ModuleBase::matrix& sigma, const K_Vectors *p_kv, const psi::Psi , Device>* d_psi_in, const UnitCell& ucell) { - double nqs_half1 = 0.5 * p_kv->nmp[0]; - double nqs_half2 = 0.5 * p_kv->nmp[1]; - double nqs_half3 = 0.5 * p_kv->nmp[2]; bool gamma_extrapolation = PARAM.inp.exx_gamma_extrapolation; - if (!p_kv->get_is_mp()) + bool is_mp = p_kv->get_is_mp(); +#ifdef __MPI + Parallel_Common::bcast_bool(is_mp); +#endif + if (!is_mp) { gamma_extrapolation = false; } - auto isint = [](double x) - { - double epsilon = 1e-6; // this follows the isint judgement in q-e - return std::abs(x - std::round(x)) < epsilon; - }; // T is complex of FPTYPE, if FPTYPE is double, T is std::complex // but if FPTYPE is std::complex, T is still std::complex @@ -54,11 +51,11 @@ void Stress_PW::stress_exx(ModuleBase::matrix& sigma, resmem_complex_op()(psi_mq_real, wfcpw->nrxx); resmem_complex_op()(density_real, rhopw->nrxx); resmem_complex_op()(density_recip, rhopw->npw); - resmem_real_op()(pot, rhopw->npw * nks * nks); - resmem_real_op()(pot_stress, rhopw->npw * nks * nks); + resmem_real_op()(pot, rhopw->npw); + resmem_real_op()(pot_stress, rhopw->npw); - hamilt::get_exx_potential(p_kv, wfcpw, rhopw, pot, tpiba, gamma_extrapolation, omega); - hamilt::get_exx_stress_potential(p_kv, wfcpw, rhopw, pot_stress, tpiba, gamma_extrapolation, omega); + // hamilt::get_exx_potential(p_kv, wfcpw, rhopw, pot, tpiba, gamma_extrapolation, omega); + // hamilt::get_exx_stress_potential(p_kv, wfcpw, rhopw, pot_stress, tpiba, gamma_extrapolation, omega); // calculate the stress @@ -75,6 +72,8 @@ void Stress_PW::stress_exx(ModuleBase::matrix& sigma, for (int iq = 0; iq < nqs; iq++) { + hamilt::get_exx_potential(p_kv, wfcpw, rhopw, pot, tpiba, gamma_extrapolation, omega, ik, iq, true); + hamilt::get_exx_stress_potential(p_kv, wfcpw, rhopw, pot_stress, tpiba, gamma_extrapolation, omega, ik, iq); for (int mband = 0; mband < d_psi_in->get_nbands(); mband++) { // psi_mq in real space @@ -111,7 +110,7 @@ void Stress_PW::stress_exx(ModuleBase::matrix& sigma, double kqg_beta = kqg[beta] * tpiba; // equation 10 of 10.1103/PhysRevB.73.125120 double density_recip2 = std::real(density_recip[ig] * std::conj(density_recip[ig])); - const int idx = ig + iq * rhopw->npw + ik * rhopw->npw * nqs; + const int idx = ig; double pot_local = pot[idx]; double pot_stress_local = pot_stress[idx]; sigma_ab_loc += density_recip2 * pot_local * (kqg_alpha * kqg_beta * pot_stress_local - delta_ab) ;