diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp index a22c87bcf7..5558856289 100644 --- a/source/module_elecstate/elecstate_pw.cpp +++ b/source/module_elecstate/elecstate_pw.cpp @@ -31,19 +31,26 @@ ElecStatePW::ElecStatePW(ModulePW::PW_Basis_K* wfc_basis_in, template ElecStatePW::~ElecStatePW() { - if (base_device::get_device_type(this->ctx) == base_device::GpuDevice) + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { delmem_var_op()(this->ctx, this->rho_data); + delete[] this->rho; + + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + delmem_complex_op()(this->ctx, this->rhog_data); + delete[] this->rhog; + } if (get_xc_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { delmem_var_op()(this->ctx, this->kin_r_data); + delete[] this->kin_r; } } - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - delete[] this->rho; - delete[] this->kin_r; + if (PARAM.globalv.use_uspp) + { + delmem_var_op()(this->ctx, this->becsum); } - delmem_var_op()(this->ctx, becsum); delmem_complex_op()(this->ctx, this->wfcr); delmem_complex_op()(this->ctx, this->wfcr_another_spin); } @@ -51,16 +58,28 @@ ElecStatePW::~ElecStatePW() template void ElecStatePW::init_rho_data() { - if(this->init_rho) { + if (this->init_rho) + { return; } - - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { + + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") + { this->rho = new Real*[this->charge->nspin]; resmem_var_op()(this->ctx, this->rho_data, this->charge->nspin * this->charge->nrxx); - for (int ii = 0; ii < this->charge->nspin; ii++) { + for (int ii = 0; ii < this->charge->nspin; ii++) + { this->rho[ii] = this->rho_data + ii * this->charge->nrxx; } + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + this->rhog = new T*[this->charge->nspin]; + resmem_complex_op()(this->ctx, this->rhog_data, this->charge->nspin * this->charge->rhopw->npw); + for (int ii = 0; ii < this->charge->nspin; ii++) + { + this->rhog[ii] = this->rhog_data + ii * this->charge->rhopw->npw; + } + } if (get_xc_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { this->kin_r = new Real*[this->charge->nspin]; @@ -70,8 +89,13 @@ void ElecStatePW::init_rho_data() } } } - else { + else + { this->rho = reinterpret_cast(this->charge->rho); + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + this->rhog = reinterpret_cast(this->charge->rhog); + } if (get_xc_func_type() == 3 || PARAM.inp.out_elf[0] > 0) { this->kin_r = reinterpret_cast(this->charge->kin_r); @@ -100,19 +124,24 @@ void ElecStatePW::psiToRho(const psi::Psi& psi) // ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx); setmem_var_op()(this->ctx, this->kin_r[is], 0, this->charge->nrxx); } - } + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) + { + setmem_complex_op()(this->ctx, this->rhog[is], 0, this->charge->rhopw->npw); + } + } for (int ik = 0; ik < psi.get_nk(); ++ik) { psi.fix_k(ik); this->updateRhoK(psi); } - if (PARAM.globalv.use_uspp) + + this->add_usrho(psi); + + if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - this->add_usrho(psi); - } - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { - for (int ii = 0; ii < PARAM.inp.nspin; ii++) { + for (int ii = 0; ii < PARAM.inp.nspin; ii++) + { castmem_var_d2h_op()(cpu_ctx, this->ctx, this->charge->rho[ii], this->rho[ii], this->charge->nrxx); if (get_xc_func_type() == 3) { @@ -397,32 +426,39 @@ void ElecStatePW::cal_becsum(const psi::Psi& psi) template void ElecStatePW::add_usrho(const psi::Psi& psi) { - this->cal_becsum(psi); + if (PARAM.globalv.use_uspp) + { + this->cal_becsum(psi); + } // transform soft charge to recip space using smooth grids - T* rhog = nullptr; - resmem_complex_op()(this->ctx, rhog, this->charge->rhopw->npw * PARAM.inp.nspin, "ElecState::rhog"); - setmem_complex_op()(this->ctx, rhog, 0, this->charge->rhopw->npw * PARAM.inp.nspin); - for (int is = 0; is < PARAM.inp.nspin; is++) + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { - this->rhopw_smooth->real2recip(this->rho[is], &rhog[is * this->charge->rhopw->npw]); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + this->rhopw_smooth->real2recip(this->rho[is], this->rhog[is]); + } } // \sum_lm Q_lm(r) \sum_i w_i // add to the charge density in reciprocal space the part which is due to the US augmentation. - this->addusdens_g(becsum, rhog); + if (PARAM.globalv.use_uspp) + { + this->addusdens_g(becsum, rhog); + } // transform back to real space using dense grids - for (int is = 0; is < PARAM.inp.nspin; is++) + if (PARAM.globalv.double_grid || PARAM.globalv.use_uspp) { - this->charge->rhopw->recip2real(&rhog[is * this->charge->rhopw->npw], this->rho[is]); + for (int is = 0; is < PARAM.inp.nspin; is++) + { + this->charge->rhopw->recip2real(this->rhog[is], this->rho[is]); + } } - - delmem_complex_op()(this->ctx, rhog); } template -void ElecStatePW::addusdens_g(const Real* becsum, T* rhog) +void ElecStatePW::addusdens_g(const Real* becsum, T** rhog) { const T one{1, 0}; const T zero{0, 0}; @@ -506,7 +542,7 @@ void ElecStatePW::addusdens_g(const Real* becsum, T* rhog) this->ppcell->radial_fft_q(this->ctx, npw, ih, jh, it, qmod, ylmk0, qgm); for (int ig = 0; ig < npw; ig++) { - rhog[is * npw + ig] += qgm[ig] * aux2[ijh * npw + ig]; + rhog[is][ig] += qgm[ig] * aux2[ijh * npw + ig]; } ijh++; } diff --git a/source/module_elecstate/elecstate_pw.h b/source/module_elecstate/elecstate_pw.h index dc992fa37a..8259d83024 100644 --- a/source/module_elecstate/elecstate_pw.h +++ b/source/module_elecstate/elecstate_pw.h @@ -42,8 +42,9 @@ class ElecStatePW : public ElecState //! init rho_data and kin_r_data void init_rho_data(); - Real** rho = nullptr; - Real** kin_r = nullptr; //[Device] [spin][nrxx] rho and kin_r + Real** rho = nullptr; // [Device] [spin][nrxx] rho + T** rhog = nullptr; // [Device] [spin][nrxx] rhog + Real** kin_r = nullptr; // [Device] [spin][nrxx] kin_r protected: @@ -70,7 +71,7 @@ class ElecStatePW : public ElecState //! Non-local pseudopotentials //! \sum_lm Q_lm(r) \sum_i w_i - void addusdens_g(const Real* becsum, T* rhog); + void addusdens_g(const Real* becsum, T** rhog); Device * ctx = {}; @@ -78,7 +79,8 @@ class ElecStatePW : public ElecState mutable T* vkb = nullptr; - Real* rho_data = nullptr; + Real* rho_data = nullptr; + T* rhog_data = nullptr; Real* kin_r_data = nullptr; T* wfcr = nullptr; T* wfcr_another_spin = nullptr; diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index b1307b8523..66e255d4b8 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -1542,6 +1542,8 @@ void pseudopot_cell_vnl::newq(const ModuleBase::matrix& veff, const ModulePW::PW fact = 2.0; } + deeq.zero_out(); + const int npw = rho_basis->npw; ModuleBase::matrix ylmk0(lmaxq * lmaxq, npw); ModuleBase::YlmReal::Ylm_Real(lmaxq * lmaxq, npw, rho_basis->gcar, ylmk0); diff --git a/tests/integrate/101_PW_upf201_Al_pseudopots/INPUT b/tests/integrate/101_PW_upf201_Al_pseudopots/INPUT index 8a3a962f7a..b6caf45d0b 100644 --- a/tests/integrate/101_PW_upf201_Al_pseudopots/INPUT +++ b/tests/integrate/101_PW_upf201_Al_pseudopots/INPUT @@ -9,6 +9,7 @@ pseudo_dir ../../PP_ORB #Parameters (2.Iteration) ecutwfc 30 +ecutrho 160 scf_thr 1e-9 scf_nmax 100 diff --git a/tests/integrate/101_PW_upf201_Al_pseudopots/README b/tests/integrate/101_PW_upf201_Al_pseudopots/README index eb66dbf457..eb167f8f42 100644 --- a/tests/integrate/101_PW_upf201_Al_pseudopots/README +++ b/tests/integrate/101_PW_upf201_Al_pseudopots/README @@ -3,3 +3,5 @@ This test for: *upf201 pseudopotential *mixing_type broyden-kerker *mixing_beta 0.7 +*ecutwfc 30 +*ecutrho 160 diff --git a/tests/integrate/101_PW_upf201_Al_pseudopots/jd b/tests/integrate/101_PW_upf201_Al_pseudopots/jd index eb37ed4384..8d19624a92 100644 --- a/tests/integrate/101_PW_upf201_Al_pseudopots/jd +++ b/tests/integrate/101_PW_upf201_Al_pseudopots/jd @@ -1 +1 @@ -test upf201 pseudopotential, symmetry=on +test upf201 pseudopotential with ecutrho/ecutwfc > 4, symmetry=on diff --git a/tests/integrate/101_PW_upf201_Al_pseudopots/result.ref b/tests/integrate/101_PW_upf201_Al_pseudopots/result.ref index 1d10b82f85..f10e1e901b 100644 --- a/tests/integrate/101_PW_upf201_Al_pseudopots/result.ref +++ b/tests/integrate/101_PW_upf201_Al_pseudopots/result.ref @@ -1,6 +1,6 @@ -etotref -57.66876692615671 -etotperatomref -57.6687669262 +etotref -57.66876684738178 +etotperatomref -57.6687668474 pointgroupref O_h spacegroupref O_h nksibzref 3 -totaltimeref +totaltimeref 0.29 diff --git a/tests/integrate/101_PW_upf201_uspp_ncpp/INPUT b/tests/integrate/101_PW_upf201_uspp_ncpp/INPUT new file mode 100644 index 0000000000..5264007cf5 --- /dev/null +++ b/tests/integrate/101_PW_upf201_uspp_ncpp/INPUT @@ -0,0 +1,30 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf +symmetry 0 +latname fcc +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 20 +ecutrho 160 +scf_thr 1e-9 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type pw + +#Parameters (4.Smearing) +smearing_method gaussian +smearing_sigma 0.002 + +#parameters (5.Mixing) +mixing_type pulay +mixing_beta 0.4 + +pseudo_mesh 1 +pseudo_rcut 10 + +cal_force 1 +cal_stress 1 diff --git a/tests/integrate/101_PW_upf201_uspp_ncpp/KPT b/tests/integrate/101_PW_upf201_uspp_ncpp/KPT new file mode 100644 index 0000000000..f5f7f4ec34 --- /dev/null +++ b/tests/integrate/101_PW_upf201_uspp_ncpp/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +2 2 2 0 0 0 diff --git a/tests/integrate/101_PW_upf201_uspp_ncpp/STRU b/tests/integrate/101_PW_upf201_uspp_ncpp/STRU new file mode 100644 index 0000000000..495f37f126 --- /dev/null +++ b/tests/integrate/101_PW_upf201_uspp_ncpp/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf +Cl 35.453 Cl.pbe-nl-rrkjus_psl.1.0.0.UPF + +LATTICE_CONSTANT +7 + +ATOMIC_POSITIONS +Direct + +H #label +0 #magnetism +1 #number of atoms +0.00 0.00 0.00 + +Cl #label +0 #magnetism +1 #number of atoms +0.51 0.00 0.00 diff --git a/tests/integrate/101_PW_upf201_uspp_ncpp/jd b/tests/integrate/101_PW_upf201_uspp_ncpp/jd new file mode 100644 index 0000000000..d049618ee2 --- /dev/null +++ b/tests/integrate/101_PW_upf201_uspp_ncpp/jd @@ -0,0 +1 @@ +test combination of upf201 uspp and ncpp, HCl nspin=1, symmetry=off diff --git a/tests/integrate/101_PW_upf201_uspp_ncpp/result.ref b/tests/integrate/101_PW_upf201_uspp_ncpp/result.ref new file mode 100644 index 0000000000..106e20bba3 --- /dev/null +++ b/tests/integrate/101_PW_upf201_uspp_ncpp/result.ref @@ -0,0 +1,5 @@ +etotref -426.8936030043360006 +etotperatomref -213.4468015022 +totalforceref 0.623688 +totalstressref 8822.968757 +totaltimeref 0.97 diff --git a/tests/integrate/CASES_CPU.txt b/tests/integrate/CASES_CPU.txt index 1c76104ae0..6e6093523a 100644 --- a/tests/integrate/CASES_CPU.txt +++ b/tests/integrate/CASES_CPU.txt @@ -18,6 +18,7 @@ 101_PW_upf201_upf100_pseudopots 101_PW_upf201_uspp_Fe 101_PW_upf201_uspp_NaCl +101_PW_upf201_uspp_ncpp 101_PW_VW_pseudopots 101_PW_Coulomb 101_PW_GTH_CF_CS_Si