From d73b1d2b51cb59f09a90ece3b1d64dc96ec47f78 Mon Sep 17 00:00:00 2001 From: Erjie Wu <110683255+ErjieWu@users.noreply.github.com> Date: Thu, 12 Dec 2024 08:55:37 +0800 Subject: [PATCH] Refactor: Combine gamma-only and multi-k versions of some functions in DeePKS. (#5717) * Add support for INPUT deepks_v_delta>0 in multi-k points DeePKS calculations * Refactor: Change LCAO_Deepks_Interface to template class. * Remove the h_mat and h_mat_k variables in LCAO_Deepks and change H_V_delta to form consistent with H_V_delta_k. * Change functions in deepks_hmat to template. * Combine gamma-only and multi-k for v_delta_precalc. * Change functions about v_delta_precalc and psialpha in deepks_v_delta calculations to templates. * Change save_npy_h to template. * Change some functions in LCAO_deepks_io to templates. * Remove ld.V_deltaR. * Change cal_orbital_precalc to template. * Remove orbital_precalc_k.cpp. * Change cal_gdmx into template function. * [pre-commit.ci lite] apply automatic fixes * Update LCAO_deepks_interface.cpp * Update FORCE_STRESS.cpp * Update FORCE_gamma.cpp * Update deepks_lcao.cpp * Update LCAO_deepks.cpp * Update LCAO_deepks.cpp * Update LCAO_deepks.h --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> --- source/Makefile.Objects | 3 - source/module_esolver/esolver_ks_lcao.cpp | 50 +- .../hamilt_lcaodft/FORCE_STRESS.cpp | 16 +- .../hamilt_lcaodft/FORCE_gamma.cpp | 5 +- .../operator_lcao/deepks_lcao.cpp | 6 +- .../module_deepks/CMakeLists.txt | 3 - .../module_deepks/LCAO_deepks.cpp | 45 +- .../module_deepks/LCAO_deepks.h | 87 +--- .../module_deepks/LCAO_deepks_interface.cpp | 460 ++++++------------ .../module_deepks/LCAO_deepks_interface.h | 23 +- .../module_deepks/LCAO_deepks_io.cpp | 226 ++++----- .../module_deepks/LCAO_deepks_io.h | 95 ++-- .../module_deepks/LCAO_deepks_odelta.cpp | 6 +- .../module_deepks/LCAO_deepks_pdm.cpp | 5 +- .../module_deepks/LCAO_deepks_torch.cpp | 213 ++++---- .../module_deepks/LCAO_deepks_vdelta.cpp | 6 +- .../module_deepks/cal_gdmx.cpp | 97 +++- .../module_deepks/cal_gdmx_k.cpp | 249 ---------- .../module_deepks/deepks_hmat.cpp | 144 ++---- .../module_deepks/deepks_hmat.h | 32 +- .../module_deepks/orbital_precalc.cpp | 290 ++++++++--- .../module_deepks/orbital_precalc_k.cpp | 350 ------------- .../module_deepks/test/LCAO_deepks_test.cpp | 4 +- .../module_deepks/v_delta_precalc.cpp | 185 +++++-- .../module_deepks/v_delta_precalc_k.cpp | 234 --------- 25 files changed, 946 insertions(+), 1888 deletions(-) delete mode 100644 source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp delete mode 100644 source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp delete mode 100644 source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 661db25611..ae07f24df8 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -201,14 +201,11 @@ OBJS_DEEPKS=LCAO_deepks.o\ deepks_hmat.o\ LCAO_deepks_interface.o\ orbital_precalc.o\ - orbital_precalc_k.o\ cal_gdmx.o\ - cal_gdmx_k.o\ cal_gedm.o\ cal_gvx.o\ cal_descriptor.o\ v_delta_precalc.o\ - v_delta_precalc_k.o\ OBJS_ELECSTAT=elecstate.o\ diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index e515c23903..3d089e568e 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -961,7 +961,7 @@ void ESolver_KS_LCAO::after_scf(UnitCell& ucell, const int istep) // 6) write Hamiltonian and Overlap matrix for (int ik = 0; ik < this->kv.get_nks(); ++ik) { - if (PARAM.inp.out_mat_hs[0] || PARAM.inp.deepks_v_delta) + if (PARAM.inp.out_mat_hs[0]) { this->p_hamilt->updateHk(ik); } @@ -1000,12 +1000,6 @@ void ESolver_KS_LCAO::after_scf(UnitCell& ucell, const int istep) this->pv, GlobalV::DRANK); } -#ifdef __DEEPKS - if (PARAM.inp.deepks_out_labels && PARAM.inp.deepks_v_delta) - { - DeePKS_domain::save_h_mat(h_mat.p, this->pv.nloc, ik); - } -#endif } } @@ -1023,24 +1017,30 @@ void ESolver_KS_LCAO::after_scf(UnitCell& ucell, const int istep) //! 8) Write DeePKS information #ifdef __DEEPKS - std::shared_ptr ld_shared_ptr(&GlobalC::ld, [](LCAO_Deepks*) {}); - LCAO_Deepks_Interface LDI = LCAO_Deepks_Interface(ld_shared_ptr); - ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels"); - LDI.out_deepks_labels(this->pelec->f_en.etot, - this->pelec->klist->get_nks(), - ucell.nat, - PARAM.globalv.nlocal, - this->pelec->ekb, - this->pelec->klist->kvec_d, - ucell, - orb_, - GlobalC::GridD, - &(this->pv), - *(this->psi), - dynamic_cast*>(this->pelec)->get_DM(), - PARAM.inp.deepks_v_delta); - - ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels"); + if (this->psi != nullptr && (istep % PARAM.inp.out_interval == 0)) + { + hamilt::HamiltLCAO* p_ham_deepks + = dynamic_cast*>(this->p_hamilt); + std::shared_ptr ld_shared_ptr(&GlobalC::ld, [](LCAO_Deepks*) {}); + LCAO_Deepks_Interface LDI(ld_shared_ptr); + + ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels"); + LDI.out_deepks_labels(this->pelec->f_en.etot, + this->pelec->klist->get_nks(), + ucell.nat, + PARAM.globalv.nlocal, + this->pelec->ekb, + this->pelec->klist->kvec_d, + ucell, + orb_, + GlobalC::GridD, + &(this->pv), + *(this->psi), + dynamic_cast*>(this->pelec)->get_DM(), + p_ham_deepks); + + ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels"); + } #endif //! 9) Perform RDMFT calculations diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp index 43a633daf2..4a262da0e4 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp @@ -522,7 +522,7 @@ void Force_Stress_LCAO::getForceStress(const bool isforce, { const std::vector>& dm_gamma = dynamic_cast*>(pelec)->get_DM()->get_DMK_vector(); - GlobalC::ld.cal_gdmx(dm_gamma[0], ucell, orb, GlobalC::GridD, isstress); + GlobalC::ld.cal_gdmx(dm_gamma, ucell, orb, GlobalC::GridD, kv.get_nks(), kv.kvec_d, isstress); } else { @@ -531,13 +531,13 @@ void Force_Stress_LCAO::getForceStress(const bool isforce, ->get_DM() ->get_DMK_vector(); - GlobalC::ld.cal_gdmx_k(dm_k, - ucell, - orb, - GlobalC::GridD, - kv.get_nks(), - kv.kvec_d, - isstress); + GlobalC::ld.cal_gdmx(dm_k, + ucell, + orb, + GlobalC::GridD, + kv.get_nks(), + kv.kvec_d, + isstress); } if (PARAM.inp.deepks_out_unittest) { diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp index c7aad83123..38e5be5051 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp @@ -260,7 +260,8 @@ void Force_LCAO::ftable(const bool isforce, if (PARAM.inp.deepks_out_unittest) { - LCAO_deepks_io::print_dm(dm_gamma[0], PARAM.globalv.nlocal, this->ParaV->nrow); + const int nks = 1; // 1 for gamma-only + LCAO_deepks_io::print_dm(nks, PARAM.globalv.nlocal, this->ParaV->nrow, dm_gamma); GlobalC::ld.check_projected_dm(); @@ -268,7 +269,7 @@ void Force_LCAO::ftable(const bool isforce, GlobalC::ld.check_gedm(); - GlobalC::ld.cal_e_delta_band(dm_gamma); + GlobalC::ld.cal_e_delta_band(dm_gamma,nks); std::ofstream ofs("E_delta_bands.dat"); ofs << std::setprecision(10) << GlobalC::ld.e_delta_band; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp index 95df523db2..47d2f57c9f 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp @@ -186,7 +186,7 @@ void DeePKS, double>>::contributeHR() { ModuleBase::timer::tick("DeePKS", "contributeHR"); - GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD); + GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD); GlobalC::ld.cal_descriptor(this->ucell->nat); // calculate dE/dD GlobalC::ld.cal_gedm(this->ucell->nat); @@ -219,7 +219,7 @@ void DeePKS, std::complex>>::contribut { ModuleBase::timer::tick("DeePKS", "contributeHR"); - GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD); + GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD); GlobalC::ld.cal_descriptor(this->ucell->nat); // calculate dE/dD GlobalC::ld.cal_gedm(this->ucell->nat); @@ -497,7 +497,7 @@ void hamilt::DeePKS>::cal_HR_IJR(const double* hr_i inline void get_h_delta_k(int ik, double*& h_delta_k) { - h_delta_k = GlobalC::ld.H_V_delta.data(); + h_delta_k = GlobalC::ld.H_V_delta[ik].data(); return; } inline void get_h_delta_k(int ik, std::complex*& h_delta_k) diff --git a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt index e7e5110c6f..f32151bafe 100644 --- a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt +++ b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt @@ -13,14 +13,11 @@ if(ENABLE_DEEPKS) deepks_hmat.cpp LCAO_deepks_interface.cpp orbital_precalc.cpp - orbital_precalc_k.cpp cal_gdmx.cpp - cal_gdmx_k.cpp cal_gedm.cpp cal_gvx.cpp cal_descriptor.cpp v_delta_precalc.cpp - v_delta_precalc_k.cpp ) add_library( diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp index 84eb2d6e6b..1676838282 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp @@ -17,7 +17,6 @@ //4. subroutines that are related to V_delta: // - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls // init_gdmx, as well as allocating F_delta -// - allocate_V_deltaR : allcoates H_V_deltaR, for multi-k calculations #ifdef __DEEPKS @@ -35,7 +34,6 @@ LCAO_Deepks::LCAO_Deepks() alpha_index = new ModuleBase::IntArray[1]; inl_index = new ModuleBase::IntArray[1]; inl_l = nullptr; - H_V_deltaR = nullptr; gedm = nullptr; } @@ -45,7 +43,6 @@ LCAO_Deepks::~LCAO_Deepks() delete[] alpha_index; delete[] inl_index; delete[] inl_l; - delete[] H_V_deltaR; //=======1. to use deepks, pdm is required========== //delete pdm** @@ -92,7 +89,10 @@ void LCAO_Deepks::init( int tot_inl = tot_inl_per_atom * nat; - if(PARAM.inp.deepks_equiv) tot_inl = nat; + if(PARAM.inp.deepks_equiv) + { + tot_inl = nat; + } this->lmaxd = lm; this->nmaxd = nm; @@ -143,25 +143,6 @@ void LCAO_Deepks::init( this->pv = &pv_in; - if(PARAM.inp.deepks_v_delta) - { - //allocate and init h_mat - if(PARAM.globalv.gamma_only_local) - { - int nloc=this->pv->nloc; - this->h_mat.resize(nloc,0.0); - } - else - { - int nloc=this->pv->nloc; - this->h_mat_k.resize(nks); - for (int ik = 0; ik < nks; ik++) - { - this->h_mat_k[ik].resize(nloc,std::complex(0.0,0.0)); - } - } - } - return; } @@ -335,8 +316,9 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks) //initialize the H matrix H_V_delta if(PARAM.globalv.gamma_only_local) { - this->H_V_delta.resize(pv->nloc); - ModuleBase::GlobalFunc::ZEROS(this->H_V_delta.data(), pv->nloc); + H_V_delta.resize(1); // the first dimension is for the consistence with H_V_delta_k + this->H_V_delta[0].resize(pv->nloc); + ModuleBase::GlobalFunc::ZEROS(this->H_V_delta[0].data(), pv->nloc); } else { @@ -387,15 +369,6 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks) return; } -void LCAO_Deepks::allocate_V_deltaR(const int nnr) -{ - ModuleBase::TITLE("LCAO_Deepks", "allocate_V_deltaR"); - GlobalV::ofs_running << nnr << std::endl; - delete[] H_V_deltaR; - H_V_deltaR = new double[nnr]; - ModuleBase::GlobalFunc::ZEROS(H_V_deltaR, nnr); -} - void LCAO_Deepks::init_orbital_pdm_shell(const int nks) { @@ -541,12 +514,12 @@ void LCAO_Deepks::del_v_delta_pdm_shell(const int nks,const int nlocal) void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector>& dm, const int nks) { - this->cal_e_delta_band(dm); + this->cal_e_delta_band(dm, nks); } void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector>>& dm, const int nks) { - this->cal_e_delta_band_k(dm, nks); + this->cal_e_delta_band(dm, nks); } #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h index 57b3609059..51d765f1b2 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h @@ -54,18 +54,10 @@ class LCAO_Deepks ///\rho_{HL} = c_{L, \mu}c_{L,\nu} - c_{H, \mu}c_{H,\nu} \f$ (for gamma_only) ModuleBase::matrix o_delta; - ///(Unit: Ry) Hamiltonian matrix in k space - /// for gamma only - std::vector h_mat; - /// for multi-k - std::vector>> h_mat_k; - /// Correction term to the Hamiltonian matrix: \f$\langle\psi|V_\delta|\psi\rangle\f$ (for gamma only) - std::vector H_V_delta; + /// The size of first dimension is 1, which is used for the consitence with H_V_delta_k + std::vector> H_V_delta; /// Correction term to Hamiltonian, for multi-k - /// In R space: - double* H_V_deltaR; - /// In k space: std::vector>> H_V_delta_k; // F_delta will be deleted soon, mohan 2024-07-25 @@ -213,7 +205,6 @@ class LCAO_Deepks // 3. subroutines that are related to V_delta: // - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls // init_gdmx, as well as allocating F_delta - // - allocate_V_deltaR : allcoates H_V_deltaR, for multi-k calculations public: explicit LCAO_Deepks(); @@ -231,8 +222,6 @@ class LCAO_Deepks /// Allocate memory for correction to Hamiltonian void allocate_V_delta(const int nat, const int nks = 1); - void allocate_V_deltaR(const int nnr); - // array for storing gdmx, used for calculating gvx void init_gdmx(const int nat); // void del_gdmx(const int nat); @@ -299,8 +288,7 @@ class LCAO_Deepks // 3. check_projected_dm, which prints pdm to descriptor.dat // 4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point - // 5. cal_gdmx_k, counterpart of 3, for multi-k - // 6. check_gdmx, which prints gdmx to a series of .dat files + // 5. check_gdmx, which prints gdmx to a series of .dat files public: /** @@ -316,7 +304,7 @@ class LCAO_Deepks const LCAO_Orbitals& orb, Grid_Driver& GridD); - void cal_projected_DM_k(const elecstate::DensityMatrix, double>* dm, + void cal_projected_DM(const elecstate::DensityMatrix, double>* dm, const UnitCell& ucell, const LCAO_Orbitals& orb, Grid_Driver& GridD); @@ -335,21 +323,16 @@ class LCAO_Deepks // calculate the gradient of pdm with regard to atomic positions // d/dX D_{Inl,mm'} + template void cal_gdmx( // const ModuleBase::matrix& dm, - const std::vector& dm, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - Grid_Driver& GridD, - const bool isstress); - - void cal_gdmx_k( // const std::vector& dm, - const std::vector>>& dm, + const std::vector>& dm, const UnitCell& ucell, const LCAO_Orbitals& orb, Grid_Driver& GridD, const int nks, const std::vector>& kvec_d, const bool isstress); + void check_gdmx(const int nat); /** @@ -381,10 +364,10 @@ class LCAO_Deepks public: /// calculate tr(\rho V_delta) // void cal_e_delta_band(const std::vector& dm/**<[in] density matrix*/); - void cal_e_delta_band(const std::vector>& dm /**<[in] density matrix*/); + void cal_e_delta_band(const std::vector>& dm /**<[in] density matrix*/, const int /*nks*/); // void cal_e_delta_band_k(const std::vector& dm/**<[in] density matrix*/, // const int nks); - void cal_e_delta_band_k(const std::vector>>& dm /**<[in] density matrix*/, + void cal_e_delta_band(const std::vector>>& dm /**<[in] density matrix*/, const int nks); //! a temporary interface for cal_e_delta_band and cal_e_delta_band_k @@ -404,8 +387,9 @@ class LCAO_Deepks public: void cal_o_delta(const std::vector>& - dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/); - void cal_o_delta_k(const std::vector>& + dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/, + const int nks); + void cal_o_delta(const std::vector>& dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/, const int nks); @@ -439,18 +423,12 @@ class LCAO_Deepks // 10. cal_orbital_precalc : orbital_precalc is usted for training with orbital label, // which equals gvdm * orbital_pdm_shell, // orbital_pdm_shell[1,Inl,nm*nm] = dm_hl * overlap * overlap - // 11. cal_orbital_precalc_k : orbital_precalc is usted for training with orbital label, - // for multi-k case, which equals gvdm * orbital_pdm_shell, - // orbital_pdm_shell[1,Inl,nm*nm] = dm_hl_k * overlap * overlap - // 12. cal_v_delta_precalc : v_delta_precalc is used for training with v_delta label, + // 11. cal_v_delta_precalc : v_delta_precalc is used for training with v_delta label, // which equals gvdm * v_delta_pdm_shell, // v_delta_pdm_shell = overlap * overlap - // 13. cal_v_delta_precalc_k : v_delta_precalc is used for training with v_delta label, - // for multi-k case, which equals ??? - // ??? - // 14. check_v_delta_precalc : check v_delta_precalc - // 15. prepare_psialpha : prepare psialpha for outputting npy file - // 16. prepare_gevdm : prepare gevdm for outputting npy file + // 12. check_v_delta_precalc : check v_delta_precalc + // 13. prepare_psialpha : prepare psialpha for outputting npy file + // 14. prepare_gevdm : prepare gevdm for outputting npy file public: /// Calculates descriptors @@ -485,30 +463,18 @@ class LCAO_Deepks void cal_gedm_equiv(const int nat); // calculates orbital_precalc - void cal_orbital_precalc(const std::vector>& dm_hl /**<[in] density matrix*/, + template + void cal_orbital_precalc(const std::vector>& dm_hl /**<[in] density matrix*/, const int nat, + const int nks, + const std::vector>& kvec_d, const UnitCell& ucell, const LCAO_Orbitals& orb, Grid_Driver& GridD); - // calculates orbital_precalc for multi-k case - void cal_orbital_precalc_k( - const std::vector>& dm_hl /**<[in] density matrix*/, - const int nat, - const int nks, - const std::vector>& kvec_d, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - Grid_Driver& GridD); - //calculates v_delta_precalc + template void cal_v_delta_precalc(const int nlocal, - const int nat, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - Grid_Driver &GridD); - - void cal_v_delta_precalc_k(const int nlocal, const int nat, const int nks, const std::vector> &kvec_d, @@ -516,21 +482,20 @@ class LCAO_Deepks const LCAO_Orbitals &orb, Grid_Driver &GridD); - void check_v_delta_precalc(const int nat, const int nks,const int nlocal); + template + void check_v_delta_precalc(const int nat, const int nks, const int nlocal); // prepare psialpha for outputting npy file + template void prepare_psialpha(const int nlocal, - const int nat, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - Grid_Driver &GridD); - void prepare_psialpha_k(const int nlocal, const int nat, const int nks, const std::vector> &kvec_d, const UnitCell &ucell, const LCAO_Orbitals &orb, Grid_Driver &GridD); + + template void check_vdp_psialpha(const int nat, const int nks, const int nlocal); // prepare gevdm for outputting npy file diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp index 52d3811692..266cac73ee 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp @@ -6,12 +6,15 @@ #include "module_base/global_variable.h" #include "module_base/tool_title.h" #include "module_elecstate/cal_dm.h" +#include "module_hamilt_lcao/module_hcontainer/hcontainer.h" -LCAO_Deepks_Interface::LCAO_Deepks_Interface(std::shared_ptr ld_in) : ld(ld_in) +template +LCAO_Deepks_Interface::LCAO_Deepks_Interface(std::shared_ptr ld_in) : ld(ld_in) { } -// gamma-only -void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, + +template +void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, const int& nks, const int& nat, const int& nlocal, @@ -21,17 +24,20 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, const LCAO_Orbitals& orb, Grid_Driver& GridD, const Parallel_Orbitals* ParaV, - const psi::Psi& psid, - const elecstate::DensityMatrix* dm, - const int& deepks_v_delta) + const psi::Psi& psi, + const elecstate::DensityMatrix* dm, + hamilt::HamiltLCAO* p_ham) { ModuleBase::TITLE("LCAO_Deepks_Interface", "out_deepks_labels"); + ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels"); + + // define TH for different types + using TH = std::conditional_t::value, ModuleBase::matrix, ModuleBase::ComplexMatrix>; const int my_rank = GlobalV::MY_RANK; const int nspin = PARAM.inp.nspin; - // calculating deepks correction to bandgap - // and save the results + // calculating deepks correction to bandgap and save the results if (PARAM.inp.deepks_out_labels) { // mohan updated 2024-07-25 @@ -72,118 +78,163 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, if (PARAM.inp.deepks_scf) { ModuleBase::matrix wg_hl; - wg_hl.create(nspin, PARAM.inp.nbands); + std::vector> dm_bandgap; - std::vector> dm_bandgap_gamma; + // Calculate O_delta + if constexpr (std::is_same::value) // for gamma only + { + wg_hl.create(nspin, PARAM.inp.nbands); + dm_bandgap.resize(nspin); - dm_bandgap_gamma.resize(nspin); - for (int is = 0; is < nspin; ++is) + for (int is = 0; is < nspin; ++is) + { + for (int ib = 0; ib < 1; ++ib) + { + wg_hl.zero_out(); + wg_hl(is, ib + nocc - 1) = -1.0; + wg_hl(is, ib + nocc) = 1.0; + dm_bandgap[ib].resize(nspin); + elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap[ib]); + } + } + } + else // for multi-k { - for (int ib = 0; ib < 1; ++ib) + wg_hl.create(nks, PARAM.inp.nbands); + dm_bandgap.resize(1); + + for (int ib = 0; ib < 1; ib++) { wg_hl.zero_out(); - wg_hl(is, ib + nocc - 1) = -1.0; - wg_hl(is, ib + nocc) = 1.0; - dm_bandgap_gamma[ib].resize(nspin); - elecstate::cal_dm(ParaV, wg_hl, psid, dm_bandgap_gamma[ib]); + for (int ik = 0; ik < nks; ik++) + { + wg_hl(ik, ib + nocc - 1) = -1.0; + wg_hl(ik, ib + nocc) = 1.0; + } + dm_bandgap[ib].resize(nks); + elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap[ib]); } } + + ld->cal_orbital_precalc(dm_bandgap, nat, nks, kvec_d, ucell, orb, GridD); + ld->cal_o_delta(dm_bandgap, nks); - ld->cal_orbital_precalc(dm_bandgap_gamma, nat, ucell, orb, GridD); - + // save obase and orbital_precalc LCAO_deepks_io::save_npy_orbital_precalc(nat, - nks, - ld->des_per_atom, - ld->orbital_precalc_tensor, - PARAM.globalv.global_out_dir, - my_rank); - - ld->cal_o_delta(dm_bandgap_gamma); - + nks, + ld->des_per_atom, + ld->orbital_precalc_tensor, + PARAM.globalv.global_out_dir, + my_rank); const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy"; - LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank); + LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank); } // end deepks_scf == 1 else // deepks_scf == 0 { const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy"; LCAO_deepks_io::save_npy_o(deepks_bands, file_obase, nks, my_rank); // no scf, o_tot=o_base - } // end deepks_scf == 0 + } // end deepks_scf == 0 } // end bandgap label - if(deepks_v_delta)//gamma only now + // save H(R) matrix + if (true) // should be modified later! + { + const std::string file_hr = PARAM.globalv.global_out_dir + "deepks_hr.npy"; + const hamilt::HContainer& hR = *(p_ham->getHR()); + + // How to save H(R)? + } + + if(PARAM.inp.deepks_v_delta) { - ModuleBase::matrix h_tot; - h_tot.create(nlocal,nlocal); + std::vector h_tot(nks); + std::vector> h_mat(nks, std::vector(ParaV->nloc)); + for (int ik = 0; ik < nks; ik++) + { + h_tot[ik].create(nlocal, nlocal); + p_ham->updateHk(ik); + const TK* hk_ptr = p_ham->getHk(); + for (int i = 0; i < ParaV->nloc; i++) + { + h_mat[ik][i] = hk_ptr[i]; + } + } - DeePKS_domain::collect_h_mat(*ParaV, ld->h_mat,h_tot,nlocal); + DeePKS_domain::collect_h_mat(*ParaV, h_mat, h_tot, nlocal, nks); const std::string file_htot = PARAM.globalv.global_out_dir + "deepks_htot.npy"; - LCAO_deepks_io::save_npy_h(h_tot, file_htot, nlocal, my_rank); + LCAO_deepks_io::save_npy_h(h_tot, file_htot, nlocal, nks, my_rank); if(PARAM.inp.deepks_scf) { - ModuleBase::matrix v_delta; - v_delta.create(nlocal,nlocal); - DeePKS_domain::collect_h_mat(*ParaV, ld->H_V_delta,v_delta,nlocal); + std::vector v_delta(nks); + std::vector h_base(nks); + for (int ik = 0; ik < nks; ik++) + { + v_delta[ik].create(nlocal, nlocal); + h_base[ik].create(nlocal, nlocal); + } + std::vector>* H_V_delta = nullptr; + if constexpr (std::is_same::value) + { + H_V_delta = &ld->H_V_delta; + } + else + { + H_V_delta = &ld->H_V_delta_k; + } + DeePKS_domain::collect_h_mat(*ParaV, *H_V_delta,v_delta,nlocal,nks); + // save v_delta and h_base const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy"; - LCAO_deepks_io::save_npy_h(h_tot-v_delta, file_hbase, nlocal, my_rank); + for (int ik = 0; ik < nks; ik++) + { + h_base[ik] = h_tot[ik] - v_delta[ik]; + } + LCAO_deepks_io::save_npy_h(h_base, file_hbase, nlocal, nks, my_rank); const std::string file_vdelta = PARAM.globalv.global_out_dir + "deepks_vdelta.npy"; - LCAO_deepks_io::save_npy_h(v_delta, file_vdelta, nlocal, my_rank); + LCAO_deepks_io::save_npy_h(v_delta, file_vdelta, nlocal, nks, my_rank); - if(deepks_v_delta==1)//v_delta_precalc storage method 1 + if(PARAM.inp.deepks_v_delta==1)//v_delta_precalc storage method 1 { - ld->cal_v_delta_precalc(nlocal, - nat, - ucell, - orb, - GridD); - - const int nks_gamma = 1; - LCAO_deepks_io::save_npy_v_delta_precalc( - nat, - nks_gamma, - nlocal, - ld->des_per_atom, - ld->v_delta_precalc_tensor, - PARAM.globalv.global_out_dir, - my_rank); + ld->cal_v_delta_precalc(nlocal, nat, nks, kvec_d, ucell, orb, GridD); + + LCAO_deepks_io::save_npy_v_delta_precalc(nat, + nks, + nlocal, + ld->des_per_atom, + ld->v_delta_precalc_tensor, + PARAM.globalv.global_out_dir, + my_rank); } - else if(deepks_v_delta==2)//v_delta_precalc storage method 2 + else if(PARAM.inp.deepks_v_delta==2)//v_delta_precalc storage method 2 { - ld->prepare_psialpha(nlocal, - nat, - ucell, - orb, - GridD); - - const int nks_gamma=1; - LCAO_deepks_io::save_npy_psialpha(nat, - nks_gamma, - nlocal, - ld->inlmax, - ld->lmaxd, - ld->psialpha_tensor, - PARAM.globalv.global_out_dir, - my_rank); - - ld->prepare_gevdm( - nat, - orb); + ld->prepare_psialpha(nlocal, nat, nks, kvec_d, ucell, orb, GridD); + + LCAO_deepks_io::save_npy_psialpha(nat, + nks, + nlocal, + ld->inlmax, + ld->lmaxd, + ld->psialpha_tensor, + PARAM.globalv.global_out_dir, + my_rank); + + ld->prepare_gevdm(nat, orb); LCAO_deepks_io::save_npy_gevdm(nat, - ld->inlmax, - ld->lmaxd, - ld->gevdm_tensor, - PARAM.globalv.global_out_dir, - my_rank); + ld->inlmax, + ld->lmaxd, + ld->gevdm_tensor, + PARAM.globalv.global_out_dir, + my_rank); } } else //deepks_scf == 0 { const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy"; - LCAO_deepks_io::save_npy_h(h_tot, file_hbase, nlocal, my_rank); + LCAO_deepks_io::save_npy_h(h_tot, file_hbase, nlocal, nks, my_rank); } }//end v_delta label @@ -194,6 +245,7 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, if (PARAM.inp.deepks_out_labels || PARAM.inp.deepks_scf) { // this part is for integrated test of deepks + // so it is printed no matter even if deepks_out_labels is not used // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm if(!PARAM.inp.deepks_scf) { @@ -216,258 +268,24 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, PARAM.inp.deepks_equiv, ld->d_tensor, PARAM.globalv.global_out_dir, - my_rank); // libnpy needed + GlobalV::MY_RANK); // libnpy needed } } /// print out deepks information to the screen if (PARAM.inp.deepks_scf) { - ld->cal_e_delta_band(dm->get_DMK_vector()); + ld->cal_e_delta_band(dm->get_DMK_vector(), nks); std::cout << "E_delta_band = " << std::setprecision(8) << ld->e_delta_band << " Ry" << " = " << std::setprecision(8) << ld->e_delta_band * ModuleBase::Ry_to_eV << " eV" << std::endl; - std::cout << "E_delta_NN= " << std::setprecision(8) << ld->E_delta << " Ry" + std::cout << "E_delta_NN = " << std::setprecision(8) << ld->E_delta << " Ry" << " = " << std::setprecision(8) << ld->E_delta * ModuleBase::Ry_to_eV << " eV" << std::endl; } } -// multi-k -void LCAO_Deepks_Interface::out_deepks_labels(const double& etot, - const int& nks, - const int& nat, - const int& nlocal, - const ModuleBase::matrix& ekb, - const std::vector>& kvec_d, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - Grid_Driver& GridD, - const Parallel_Orbitals* ParaV, - const psi::Psi>& psi, - const elecstate::DensityMatrix, double>* dm, - const int& deepks_v_delta) -{ - ModuleBase::TITLE("LCAO_Deepks_Interface", "out_deepks_labels"); - ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels"); - - const int my_rank = GlobalV::MY_RANK; - const int nspin = PARAM.inp.nspin; - - /// calculating deepks correction to bandgap and save the results - if (PARAM.inp.deepks_out_labels) - { - // mohan updated 2024-07-25 - const std::string file_etot = PARAM.globalv.global_out_dir + "deepks_etot.npy"; - const std::string file_ebase = PARAM.globalv.global_out_dir + "deepks_ebase.npy"; - - LCAO_deepks_io::save_npy_e(etot, file_etot, my_rank); - - if (PARAM.inp.deepks_scf) - { - /// ebase :no deepks E_delta including - LCAO_deepks_io::save_npy_e(etot - ld->E_delta, - file_ebase, my_rank); - } - else // deepks_scf = 0; base calculation - { - LCAO_deepks_io::save_npy_e(etot, file_ebase, my_rank); - } - - if (PARAM.inp.deepks_bandgap) - { - int nocc = PARAM.inp.nelec / 2; - ModuleBase::matrix deepks_bands; - deepks_bands.create(nks, 1); - for (int iks = 0; iks < nks; iks++) - { - for (int hl = 0; hl < 1; hl++) - { - deepks_bands(iks, hl) = ekb(iks, nocc + hl) - ekb(iks, nocc - 1 + hl); - } - } - - const std::string file_otot = PARAM.globalv.global_out_dir + "deepks_otot.npy"; - LCAO_deepks_io::save_npy_o(deepks_bands, file_otot, nks, my_rank); - - if (PARAM.inp.deepks_scf) - { - int nocc = PARAM.inp.nelec / 2; // redundant! - ModuleBase::matrix wg_hl; - wg_hl.create(nks, PARAM.inp.nbands); - std::vector> dm_bandgap_k; - dm_bandgap_k.resize(1); - - for (int ib = 0; ib < 1; ib++) - { - wg_hl.zero_out(); - for (int ik = 0; ik < nks; ik++) - { - wg_hl(ik, ib + nocc - 1) = -1.0; - wg_hl(ik, ib + nocc) = 1.0; - } - dm_bandgap_k[ib].resize(nks); - elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap_k[ib]); - } - - // ld->cal_o_delta_k(dm_bandgap_k, ParaV, nks); - ld->cal_orbital_precalc_k(dm_bandgap_k, nat, nks, kvec_d, ucell, orb, GridD); - - LCAO_deepks_io::save_npy_orbital_precalc( - nat, - nks, - ld->des_per_atom, - ld->orbital_precalc_tensor, - PARAM.globalv.global_out_dir, - GlobalV::MY_RANK); - - ld->cal_o_delta_k(dm_bandgap_k, nks); - - const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy"; - LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank); - } // end deepks_scf == 1 - else // deepks_scf == 0 - { - const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy"; - LCAO_deepks_io::save_npy_o(deepks_bands, file_obase, nks, my_rank); // no scf, o_tot=o_base - } // end deepks_scf == 0 - } // end bandgap label - if(deepks_v_delta) - { - std::vector h_tot(nks); - for (int ik = 0; ik < nks; ik++) - { - h_tot[ik].create(nlocal, nlocal); - } - - DeePKS_domain::collect_h_mat(*ParaV, ld->h_mat_k,h_tot,nlocal,nks); - - const std::string file_htot = PARAM.globalv.global_out_dir + "deepks_htot.npy"; - LCAO_deepks_io::save_npy_h(h_tot, file_htot, nlocal, nks, my_rank); - - if(PARAM.inp.deepks_scf) - { - std::vector v_delta(nks); - std::vector hbase(nks); - for (int ik = 0; ik < nks; ik++) - { - v_delta[ik].create(nlocal, nlocal); - hbase[ik].create(nlocal, nlocal); - } - DeePKS_domain::collect_h_mat(*ParaV, ld->H_V_delta_k,v_delta,nlocal,nks); - - const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy"; - for (int ik = 0; ik < nks; ik++) - { - hbase[ik] = h_tot[ik] - v_delta[ik]; - } - LCAO_deepks_io::save_npy_h(hbase, file_hbase, nlocal, nks, my_rank); - - const std::string file_vdelta = PARAM.globalv.global_out_dir + "deepks_vdelta.npy"; - LCAO_deepks_io::save_npy_h(v_delta, file_vdelta, nlocal, nks, my_rank); - - if(deepks_v_delta==1)//v_delta_precalc storage method 1 - { - ld->cal_v_delta_precalc_k(nlocal, - nat, - nks, - kvec_d, - ucell, - orb, - GridD); - - LCAO_deepks_io::save_npy_v_delta_precalc( - nat, - nks, - nlocal, - ld->des_per_atom, - ld->v_delta_precalc_tensor, - PARAM.globalv.global_out_dir, - my_rank); - - } - else if(deepks_v_delta==2)//v_delta_precalc storage method 2 - { - ld->prepare_psialpha_k(nlocal, - nat, - nks, - kvec_d, - ucell, - orb, - GridD); - - LCAO_deepks_io::save_npy_psialpha(nat, - nks, - nlocal, - ld->inlmax, - ld->lmaxd, - ld->psialpha_tensor, - PARAM.globalv.global_out_dir, - my_rank); - - ld->prepare_gevdm( - nat, - orb); - - LCAO_deepks_io::save_npy_gevdm(nat, - ld->inlmax, - ld->lmaxd, - ld->gevdm_tensor, - PARAM.globalv.global_out_dir, - my_rank); - } - } - else //deepks_scf == 0 - { - const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy"; - LCAO_deepks_io::save_npy_h(h_tot, file_hbase, nlocal, nks, my_rank); - } - } - } // end deepks_out_labels - - - // DeePKS PDM and descriptor - if (PARAM.inp.deepks_out_labels || PARAM.inp.deepks_scf) - { - // this part is for integrated test of deepks - // so it is printed no matter even if deepks_out_labels is not used - // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm - if(!PARAM.inp.deepks_scf) - { - ld->cal_projected_DM_k(dm, ucell, orb, GridD); - } - - ld->check_projected_dm(); // print out the projected dm for NSCF calculaiton - - ld->cal_descriptor(nat); // final descriptor - - ld->check_descriptor(ucell, PARAM.globalv.global_out_dir); - - if (PARAM.inp.deepks_out_labels) - { - LCAO_deepks_io::save_npy_d(nat, - ld->des_per_atom, - ld->inlmax, - ld->inl_l, - PARAM.inp.deepks_equiv, - ld->d_tensor, - PARAM.globalv.global_out_dir, - GlobalV::MY_RANK); // libnpy needed - } - } - // - if (PARAM.inp.deepks_scf) - { - ld->cal_e_delta_band_k(dm->get_DMK_vector(), nks); - - std::cout << "E_delta_band = " << std::setprecision(8) << ld->e_delta_band << " Ry" - << " = " << std::setprecision(8) << ld->e_delta_band * ModuleBase::Ry_to_eV << " eV" - << std::endl; - - std::cout << "E_delta_NN= " << std::setprecision(8) << ld->E_delta << " Ry" - << " = " << std::setprecision(8) << ld->E_delta * ModuleBase::Ry_to_eV << " eV" << std::endl; - } - - ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels"); -} +template class LCAO_Deepks_Interface; +template class LCAO_Deepks_Interface, double>; +template class LCAO_Deepks_Interface, std::complex>; #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h index cfb180ea19..1e657f48fe 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h @@ -5,8 +5,10 @@ #include "LCAO_deepks.h" #include "module_base/complexmatrix.h" #include "module_base/matrix.h" +#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h" #include +template class LCAO_Deepks_Interface { public: @@ -28,7 +30,6 @@ class LCAO_Deepks_Interface /// @param[in] psid /// @param[in] dm_gamma /// @param[in] dm_k - /// @param[in] deepks_v_delta // for Gamma-only void out_deepks_labels(const double& etot, const int& nks, @@ -40,23 +41,9 @@ class LCAO_Deepks_Interface const LCAO_Orbitals& orb, Grid_Driver& GridD, const Parallel_Orbitals* ParaV, - const psi::Psi& psid, - const elecstate::DensityMatrix* dm, - const int& deepks_v_delta); - // for multi-k - void out_deepks_labels(const double& etot, - const int& nks, - const int& nat, - const int& nlocal, - const ModuleBase::matrix& ekb, - const std::vector>& kvec_d, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - Grid_Driver& GridD, - const Parallel_Orbitals* ParaV, - const psi::Psi>& psi, - const elecstate::DensityMatrix, double>* dm, - const int& deepks_v_delta); + const psi::Psi& psid, + const elecstate::DensityMatrix* dm, + hamilt::HamiltLCAO* p_ham); private: std::shared_ptr ld; diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp index dd7d39227f..c1aa9cab0a 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp @@ -28,28 +28,11 @@ #include "LCAO_deepks_io.h" #include "npy.hpp" -void LCAO_deepks_io::print_dm(const std::vector &dm, - const int nlocal, - const int nrow) -{ - std::ofstream ofs("dm"); - ofs << std::setprecision(15); - - for (int mu=0; mu +void LCAO_deepks_io::print_dm(const int nks, const int nlocal, const int nrow, - const std::vector>>& dm) + const std::vector>& dm) { std::stringstream ss; for(int ik=0;ik(nks), - static_cast(nlocal), - static_cast(nlocal) }; - - std::vector npy_h; - for(int k=0; k &hamilt, +template +void LCAO_deepks_io::save_npy_h(const std::vector &hamilt, const std::string &h_file, const int nlocal, const int nks, @@ -461,7 +410,7 @@ void LCAO_deepks_io::save_npy_h(const std::vector &ha static_cast(nlocal), static_cast(nlocal) }; - std::vector> npy_h; + std::vector npy_h; for(int k=0; k &ha return; } +template void LCAO_deepks_io::save_npy_v_delta_precalc(const int nat, const int nks, const int nlocal, @@ -500,60 +450,39 @@ void LCAO_deepks_io::save_npy_v_delta_precalc(const int nat, static_cast(nlocal), static_cast(nat), static_cast(des_per_atom)}; - if (nks==1) + + std::vector npy_v_delta_precalc; + for (int iks = 0; iks < nks; ++iks) { - std::vector npy_v_delta_precalc; - for (int iks = 0; iks < nks; ++iks) + for (int mu = 0; mu < nlocal; ++mu) { - for (int mu = 0; mu < nlocal; ++mu) + for (int nu = 0; nu < nlocal; ++nu) { - for (int nu = 0; nu < nlocal; ++nu) + for (int iat = 0;iat < nat;++iat) { - for (int iat = 0;iat < nat;++iat) + for(int p=0; p::value) { npy_v_delta_precalc.push_back(v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble()); } - } - } - } - } - const std::string file_vdpre = out_dir + "deepks_vdpre.npy"; - npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc); - return; - } - else - { - std::vector> npy_v_delta_precalc; - for (int iks = 0; iks < nks; ++iks) - { - for (int mu = 0; mu < nlocal; ++mu) - { - for (int nu = 0; nu < nlocal; ++nu) - { - for (int iat = 0;iat < nat;++iat) - { - for(int p=0; p(); - auto imag_part = torch::imag(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item(); - std::complex value(real_part, imag_part); + std::complex value(torch::real(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item(), + torch::imag(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item()); npy_v_delta_precalc.push_back(value); } - } - } + } + } } } - const std::string file_vdpre = out_dir + "deepks_vdpre.npy"; - npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc); - return; } - - + const std::string file_vdpre = out_dir + "deepks_vdpre.npy"; + npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc); + return; } - +template void LCAO_deepks_io::save_npy_psialpha(const int nat, const int nks, const int nlocal, @@ -578,55 +507,35 @@ void LCAO_deepks_io::save_npy_psialpha(const int nat, static_cast(nks), static_cast(nlocal), static_cast(mmax)}; - if(nks==1) + std::vector npy_psialpha; + for(int iat=0; iat< nat ; iat++) { - std::vector npy_psialpha; - for(int iat=0; iat< nat ; iat++) + for(int nl = 0; nl < nlmax; nl++) { - for(int nl = 0; nl < nlmax; nl++) + for (int iks = 0; iks < nks ; iks++) { - for (int iks = 0; iks < nks ; iks++) + for(int mu = 0; mu < nlocal ; mu++) { - for(int mu = 0; mu < nlocal ; mu++) + for(int m=0; m< mmax; m++) { - for(int m=0; m< mmax; m++) + if constexpr (std::is_same::value) { npy_psialpha.push_back(psialpha_tensor.index({ iat,nl, iks, mu, m }).item().toDouble()); } - } - } - } - } - const std::string file_psialpha = out_dir + "deepks_psialpha.npy"; - npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha); - return; - } - else - { - std::vector> npy_psialpha; - for(int iat=0; iat< nat ; iat++) - { - for(int nl = 0; nl < nlmax; nl++) - { - for (int iks = 0; iks < nks ; iks++) - { - for(int mu = 0; mu < nlocal ; mu++) - { - for(int m=0; m< mmax; m++) + else { - std::complex value(torch::real(psialpha_tensor.index({ iat,nl, iks, mu, m })).item(), - torch::imag(psialpha_tensor.index({ iat,nl, iks, mu, m })).item()); + std::complex value(torch::real(psialpha_tensor.index({ iat, nl, iks, mu, m })).item(), + torch::imag(psialpha_tensor.index({ iat, nl, iks, mu, m })).item()); npy_psialpha.push_back(value); } - } - } + } + } } } - const std::string file_psialpha = out_dir + "deepks_psialpha.npy"; - npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha); - return; } - + const std::string file_psialpha = out_dir + "deepks_psialpha.npy"; + npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha); + return; } @@ -676,4 +585,61 @@ void LCAO_deepks_io::save_npy_gevdm(const int nat, return; } + +template void LCAO_deepks_io::print_dm(const int nks, + const int nlocal, + const int nrow, + const std::vector>& dm); + +template void LCAO_deepks_io::print_dm>(const int nks, + const int nlocal, + const int nrow, + const std::vector>>& dm); + +template void LCAO_deepks_io::save_npy_h(const std::vector &hamilt, + const std::string &h_file, + const int nlocal, + const int nks, + const int rank); + +template void LCAO_deepks_io::save_npy_h>(const std::vector &hamilt, + const std::string &h_file, + const int nlocal, + const int nks, + const int rank); + +template void LCAO_deepks_io::save_npy_v_delta_precalc(const int nat, + const int nks, + const int nlocal, + const int des_per_atom, + const torch::Tensor& v_delta_precalc_tensor, + const std::string& out_dir, + const int rank); + +template void LCAO_deepks_io::save_npy_v_delta_precalc>(const int nat, + const int nks, + const int nlocal, + const int des_per_atom, + const torch::Tensor& v_delta_precalc_tensor, + const std::string& out_dir, + const int rank); + +template void LCAO_deepks_io::save_npy_psialpha(const int nat, + const int nks, + const int nlocal, + const int inlmax, + const int lmaxd, + const torch::Tensor &psialpha_tensor, + const std::string& out_dir, + const int rank); + +template void LCAO_deepks_io::save_npy_psialpha>(const int nat, + const int nks, + const int nlocal, + const int inlmax, + const int lmaxd, + const torch::Tensor &psialpha_tensor, + const std::string& out_dir, + const int rank); + #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h index 8f88fa20d4..34398a21d7 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h @@ -20,33 +20,31 @@ namespace LCAO_deepks_io /// It also contains subroutines for printing density matrices /// which is used in unit tests - /// There are 2 subroutines for printing density matrices: - /// 1. print_dm : for gamma only - /// 2. print_dm_k : for multi-k + /// There are 2 subroutines for printing and loading .npy file: + /// 1. print_dm : print density matrices + /// 2. load_npy_gedm : load gedm from .npy file /// others print quantities in .npy format - /// 3. save_npy_d : descriptor ->dm_eig.npy - /// 4. save_npy_gvx : gvx ->grad_vx.npy - /// 5. save_npy_e : energy - /// 6. save_npy_f : force + /// 3. save_npy_d : descriptor -> deepks_dm_eig.npy + /// 4. save_npy_e : energy + /// 5. save_npy_f : force + /// 6. save_npy_gvx : gvx -> deepks_gradvx.npy /// 7. save_npy_s : stress - /// 8. save_npy_o: orbital - /// 9. save_npy_orbital_precalc: orbital_precalc -> orbital_precalc.npy - /// 10. save_npy_h : Hamiltonian - /// 11. save_npy_v_delta_precalc : v_delta_precalc - /// 12. save_npy_psialpha : psialpha - /// 13. save_npy_gevdm : grav_evdm , can use psialpha and gevdm to calculate v_delta_precalc + /// 8. save_npy_gvepsl : gvepsl -> deepks_gvepsl.npy + /// 9. save_npy_o: orbital + /// 10. save_npy_orbital_precalc: orbital_precalc -> deepks_orbpre.npy + /// 11. save_npy_h : Hamiltonian + /// 12. save_npy_v_delta_precalc : v_delta_precalc -> deepks_vdpre.npy + /// 13. save_npy_psialpha : psialpha -> deepks_psialpha.npy + /// 14. save_npy_gevdm : grav_evdm -> deepks_gevdm.npy, can use psialpha and gevdm to calculate v_delta_precalc /// print density matrices -void print_dm(const std::vector &dm, - const int nlocal, - const int nrow); - -void print_dm_k(const int nks, +template +void print_dm(const int nks, const int nlocal, const int nrow, - const std::vector>>& dm); + const std::vector>& dm); void load_npy_gedm(const int nat, const int des_per_atom, @@ -54,18 +52,7 @@ void load_npy_gedm(const int nat, double& e_delta, const int rank); - ///---------------------------------------------------------------------- - /// The following 4 functions save the `[dm_eig], [e_base], [f_base], [grad_vx]` - /// of current configuration as `.npy` file, when `deepks_scf = 1`. - /// After a full group of consfigurations are calculated, - /// we need a python script to `load` and `torch.cat` these `.npy` files, - /// and get `l_e_delta,npy` and `l_f_delta.npy` corresponding to the exact E, F data. - /// - /// Unit of energy: Ry - /// - /// Unit of force: Ry/Bohr - ///---------------------------------------------------------------------- - +/// save descriptor void save_npy_d(const int nat, const int des_per_atom, const int inlmax, @@ -75,33 +62,36 @@ void save_npy_d(const int nat, const std::string& out_dir, const int rank); -void save_npy_gvx(const int nat, - const int des_per_atom, - const torch::Tensor &gvx_tensor, - const std::string& out_dir, - const int rank); - -void save_npy_gvepsl(const int nat, - const int des_per_atom, - const torch::Tensor &gvepsl_tensor, - const std::string& out_dir, - const int rank); - +// save energy void save_npy_e(const double &e, /**<[in] \f$E_{base}\f$ or \f$E_{tot}\f$, in Ry*/ const std::string &e_file, const int rank); +// save force and gvx void save_npy_f(const ModuleBase::matrix &f, /**<[in] \f$F_{base}\f$ or \f$F_{tot}\f$, in Ry/Bohr*/ const std::string &f_file, const int nat, const int rank); +void save_npy_gvx(const int nat, + const int des_per_atom, + const torch::Tensor &gvx_tensor, + const std::string& out_dir, + const int rank); + +// save stress and gvepsl void save_npy_s(const ModuleBase::matrix &stress, /**<[in] \f$S_{base}\f$ or \f$S_{tot}\f$, in Ry/Bohr^3*/ const std::string &s_file, const double &omega, const int rank); -/// QO added on 2021-12-15 +void save_npy_gvepsl(const int nat, + const int des_per_atom, + const torch::Tensor &gvepsl_tensor, + const std::string& out_dir, + const int rank); + +/// save orbital and orbital_precalc void save_npy_o(const ModuleBase::matrix &bandgap, /**<[in] \f$E_{base}\f$ or \f$E_{tot}\f$, in Ry*/ const std::string &o_file, const int nks, @@ -114,20 +104,15 @@ void save_npy_orbital_precalc(const int nat, const std::string& out_dir, const int rank); -/// xinyuan added on 2023-2-20 -/// for gamma only -void save_npy_h(const ModuleBase::matrix &hamilt, +// save Hamiltonian and v_delta_precalc(for deepks_v_delta==1)/psialpha+gevdm(for deepks_v_delta==2) +template +void save_npy_h(const std::vector &hamilt, const std::string &h_file, const int nlocal, - const int rank); - -/// for multi-k -void save_npy_h(const std::vector &hamilt, - const std::string &h_file, - const int nlocal, const int nks, - const int rank); + const int rank); +template void save_npy_v_delta_precalc(const int nat, const int nks, const int nlocal, @@ -136,6 +121,7 @@ void save_npy_v_delta_precalc(const int nat, const std::string& out_dir, const int rank); +template void save_npy_psialpha(const int nat, const int nks, const int nlocal, @@ -145,6 +131,7 @@ void save_npy_psialpha(const int nat, const std::string& out_dir, const int rank); +// Always real, no need for template now void save_npy_gevdm(const int nat, const int inlmax, const int lmaxd, diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp index 7f9885d76f..eae3c12220 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp @@ -13,7 +13,7 @@ #include "LCAO_deepks.h" #include "module_base/parallel_reduce.h" -void LCAO_Deepks::cal_o_delta(const std::vector>& dm_hl) +void LCAO_Deepks::cal_o_delta(const std::vector>& dm_hl, const int nks) { ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta"); this->o_delta.zero_out(); @@ -31,7 +31,7 @@ void LCAO_Deepks::cal_o_delta(const std::vector> const int index = nu * pv->nrow + mu; for (int is = 0; is < PARAM.inp.nspin; ++is) { - this->o_delta(0,hl) += dm_hl[hl][is](nu, mu) * this->H_V_delta[index]; + this->o_delta(0,hl) += dm_hl[hl][is](nu, mu) * this->H_V_delta[0][index]; } } } @@ -44,7 +44,7 @@ void LCAO_Deepks::cal_o_delta(const std::vector> //calculating the correction of (LUMO-HOMO) energies, i.e., band gap corrections //for multi_k calculations -void LCAO_Deepks::cal_o_delta_k(const std::vector>& dm_hl, +void LCAO_Deepks::cal_o_delta(const std::vector>& dm_hl, const int nks) { ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta_k"); diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp index 2925dfc69a..29c1fd813e 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp @@ -14,8 +14,7 @@ //3. check_projected_dm, which prints pdm to descriptor.dat //4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point -//5. cal_gdmx_k, counterpart of 3, for multi-k -//6. check_gdmx, which prints gdmx to a series of .dat files +//5. check_gdmx, which prints gdmx to a series of .dat files #ifdef __DEEPKS @@ -317,7 +316,7 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix, double>* dm, +void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix, double>* dm, const UnitCell &ucell, const LCAO_Orbitals &orb, Grid_Driver& GridD) diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp index 97aef62322..5ec6a7acb4 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp @@ -153,8 +153,11 @@ void LCAO_Deepks::load_model(const std::string& deepks_model) { } // prepare_psialpha and prepare_gevdm for deepks_v_delta = 2 +template void LCAO_Deepks::prepare_psialpha(const int nlocal, const int nat, + const int nks, + const std::vector> &kvec_d, const UnitCell &ucell, const LCAO_Orbitals &orb, Grid_Driver &GridD) @@ -162,109 +165,15 @@ void LCAO_Deepks::prepare_psialpha(const int nlocal, ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha"); int nlmax = this->inlmax/nat; int mmax = 2*this->lmaxd+1; - this->psialpha_tensor = torch::zeros({ nat, nlmax, 1, nlocal, mmax }, torch::TensorOptions().dtype(torch::kFloat64)); // support gamma-only - - //cutoff for alpha is same for all types of atoms - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - - for (int T0 = 0; T0 < ucell.ntype; T0++) + if constexpr (std::is_same::value) { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) - { - //iat: atom index on which |alpha> is located - const int iat = ucell.itia2iat(T0,I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); - - //outermost loop : find all adjacent atoms - for (int ad=0; ad tau1 = GridD.getAdjacentTau(ad); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1) - { - continue; - } - - //middle loop : all atomic basis on the adjacent atom ad - for (int iw1=0; iw1global2local_row(iw1_all); - const int iw2_local = pv->global2local_col(iw1_all); - if(iw1_local < 0 || iw2_local < 0) {continue; -} - const int iw1_0 = iw1/PARAM.globalv.npol; - std::vector nlm = this->nlm_save[iat][ad][iw1][0]; - - int ib=0; - int nl=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int nm = 2*L0+1; - - for (int m1=0; m1psialpha_tensor[iat][nl][0][iw1_all][m1] = nlm[ib+m1]; - } - ib+=nm; - nl++; - } - } - }//end iw - }//end ad - }//end I0 - }//end T0 - -#ifdef __MPI - double msg[mmax]; - for(int iat=0; iat< nat ; iat++) + this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kFloat64); // support gamma-only + } + else { - for(int nl = 0; nl < nlmax; nl++) - { - for(int mu = 0; mu < nlocal ; mu++) - { - for(int m=0;mpsialpha_tensor[iat][nl][0][mu][m].item().toDouble(); - } - Parallel_Reduce::reduce_all(msg,mmax); - for(int m=0;mpsialpha_tensor[iat][nl][0][mu][m] = msg[m]; - } - } - } + this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kComplexDouble); // support multi-k } -#endif -} - -void LCAO_Deepks::prepare_psialpha_k(const int nlocal, - const int nat, - const int nks, - const std::vector> &kvec_d, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - Grid_Driver &GridD) -{ - ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha"); - int nlmax = this->inlmax/nat; - int mmax = 2*this->lmaxd+1; - this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kComplexDouble); // support multi-k - //cutoff for alpha is same for all types of atoms const double Rcut_Alpha = orb.Alpha[0].getRcut(); @@ -304,10 +213,13 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, key_tuple key(ibt, dR.x, dR.y, dR.z); - if (this->nlm_save_k[iat].find(key) - == this->nlm_save_k[iat].end()) + if constexpr (std::is_same>::value) { - continue; + if (this->nlm_save_k[iat].find(key) + == this->nlm_save_k[iat].end()) + { + continue; + } } //middle loop : all atomic basis on the adjacent atom ad @@ -318,14 +230,26 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, const int iw2_local = pv->global2local_col(iw1_all); if(iw1_local < 0 || iw2_local < 0) {continue;} const int iw1_0 = iw1/PARAM.globalv.npol; - std::vector nlm = this->nlm_save_k[iat][key][iw1][0]; - + std::vector nlm; + if constexpr (std::is_same::value) + { + nlm = this->nlm_save[iat][ad][iw1][0]; + } + else + { + nlm = this->nlm_save_k[iat][key][iw1][0]; + } + for (int ik = 0; ik kphase = std::complex(cosp, sinp); + std::complex kphase = std::complex(1.0, 0.0); + if constexpr (std::is_same>::value) + { + const double arg = - (kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + kphase = std::complex(cosp, sinp); + } int ib=0; int nl=0; for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) @@ -335,11 +259,18 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, const int nm = 2*L0+1; for (int m1=0; m1 nlm_phase = nlm[ib + m1] * kphase; - torch::Tensor nlm_tensor = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble); - torch::Tensor complex_tensor = torch::complex(nlm_tensor[0], nlm_tensor[1]); - this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = complex_tensor; + { + if constexpr (std::is_same::value) + { + this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = nlm[ib+m1]; + } + else + { + std::complex nlm_phase = nlm[ib + m1] * kphase; + torch::Tensor nlm_tensor = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble); + torch::Tensor complex_tensor = torch::complex(nlm_tensor[0], nlm_tensor[1]); + this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = complex_tensor; + } } ib+=nm; nl++; @@ -352,7 +283,7 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, }//end T0 #ifdef __MPI - std::complex msg[mmax]; + TK msg[mmax]; for(int iat=0; iat< nat ; iat++) { for(int nl = 0; nl < nlmax; nl++) @@ -363,15 +294,29 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, { for(int m=0;mpsialpha_tensor.index({iat, nl, ik, mu, m}); - msg[m] = std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()); + if constexpr (std::is_same::value) + { + msg[m] = this->psialpha_tensor[iat][nl][ik][mu][m].item().toDouble(); + } + else + { + auto tensor_value = this->psialpha_tensor.index({iat, nl, ik, mu, m}); + msg[m] = std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()); + } } Parallel_Reduce::reduce_all(msg,mmax); for(int m=0;mpsialpha_tensor[iat][nl][ik][mu][m] = complex_tensor; + if constexpr (std::is_same::value) + { + this->psialpha_tensor[iat][nl][ik][mu][m] = msg[m]; + } + else + { + torch::Tensor msg_tensor = torch::tensor({msg[m].real(), msg[m].imag()}, torch::kDouble); + torch::Tensor complex_tensor = torch::complex(msg_tensor[0], msg_tensor[1]); + this->psialpha_tensor[iat][nl][ik][mu][m] = complex_tensor; + } } } } @@ -381,6 +326,7 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal, #endif } +template void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlocal) { std::ofstream ofs("vdp_psialpha.dat"); @@ -398,7 +344,15 @@ void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlo { for(int m=0; m< mmax; m++) { - ofs << this->psialpha_tensor.index({ iat,nl, iks, mu, m }).item().toDouble() << " "; + if constexpr (std::is_same::value) + { + ofs << this->psialpha_tensor.index({ iat, nl, iks, mu, m }).item().toDouble() << " "; + } + else + { + auto tensor_value = this->psialpha_tensor.index({iat, nl, iks, mu, m}); + ofs << std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()) << " "; + } } } } @@ -470,4 +424,23 @@ void LCAO_Deepks::check_vdp_gevdm(const int nat) ofs.close(); } +template void LCAO_Deepks::prepare_psialpha(const int nlocal, + const int nat, + const int nks, + const std::vector> &kvec_d, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver &GridD); + +template void LCAO_Deepks::prepare_psialpha>(const int nlocal, + const int nat, + const int nks, + const std::vector> &kvec_d, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver &GridD); + +template void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlocal); +template void LCAO_Deepks::check_vdp_psialpha>(const int nat, const int nks, const int nlocal); + #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp index 459ff5c10d..9a1f151f70 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp @@ -18,7 +18,7 @@ //calculating sum of correction band energies //for gamma_only calculations -void LCAO_Deepks::cal_e_delta_band(const std::vector>& dm) +void LCAO_Deepks::cal_e_delta_band(const std::vector>& dm, const int /*nks*/) { ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band"); this->e_delta_band = 0; @@ -35,7 +35,7 @@ void LCAO_Deepks::cal_e_delta_band(const std::vector>& dm) for (int is = 0; is < dm.size(); ++is) //dm.size() == PARAM.inp.nspin { //this->e_delta_band += dm[is](nu, mu) * this->H_V_delta[index]; - this->e_delta_band += dm[is][nu*this->pv->nrow+mu] * this->H_V_delta[index]; + this->e_delta_band += dm[is][nu*this->pv->nrow+mu] * this->H_V_delta[0][index]; } } } @@ -48,7 +48,7 @@ void LCAO_Deepks::cal_e_delta_band(const std::vector>& dm) //calculating sum of correction band energies //for multi_k calculations -void LCAO_Deepks::cal_e_delta_band_k(const std::vector>>& dm, +void LCAO_Deepks::cal_e_delta_band(const std::vector>>& dm, const int nks) { ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band"); diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp index 6950e030ea..e28af0685c 100644 --- a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp +++ b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp @@ -14,10 +14,13 @@ /// be calculated: /// gdm_epsl = d/d\epsilon_{ab} * /// sum_{mu,nu} rho_{mu,nu} -void LCAO_Deepks::cal_gdmx(const std::vector& dm, +template +void LCAO_Deepks::cal_gdmx(const std::vector>& dm, const UnitCell &ucell, const LCAO_Orbitals &orb, Grid_Driver& GridD, + const int nks, + const std::vector>& kvec_d, const bool isstress) { ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx"); @@ -70,6 +73,8 @@ void LCAO_Deepks::cal_gdmx(const std::vector& dm, const int nw1_tot = atom1->nw*PARAM.globalv.npol; const double Rcut_AO1 = orb.Phi[T1].getRcut(); + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); + for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) { const int T2 = GridD.getType(ad2); @@ -79,6 +84,7 @@ void LCAO_Deepks::cal_gdmx(const std::vector& dm, const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); const Atom* atom2 = &ucell.atoms[T2]; const int nw2_tot = atom2->nw*PARAM.globalv.npol; + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); const double Rcut_AO2 = orb.Phi[T2].getRcut(); const double dist1 = (tau1-tau0).norm() * ucell.lat0; @@ -104,25 +110,68 @@ void LCAO_Deepks::cal_gdmx(const std::vector& dm, auto row_indexes = pv->get_indexes_row(ibt1); auto col_indexes = pv->get_indexes_col(ibt2); if(row_indexes.size() * col_indexes.size() == 0) continue; - - hamilt::AtomPair dm_pair(ibt1, ibt2, 0, 0, 0, pv); - dm_pair.allocate(nullptr, 1); - if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + + double* dm_current; + int dRx, dRy, dRz; + if constexpr (std::is_same::value) { - dm_pair.add_from_matrix(dm.data(), pv->get_row_size(), 1.0, 1); + dRx = 0; + dRy = 0; + dRz = 0; } else { - dm_pair.add_from_matrix(dm.data(), pv->get_col_size(), 1.0, 0); + dRx = (dR2-dR1).x; + dRy = (dR2-dR1).y; + dRz = (dR2-dR1).z; } - const double* dm_current = dm_pair.get_pointer(); + hamilt::AtomPair dm_pair(ibt1, ibt2, dRx, dRy, dRz, pv); + dm_pair.allocate(nullptr, 1); + for(int ik=0;ik::value) + { + kphase = 1.0; + } + else + { + const double arg = - (kvec_d[ik] * (dR2-dR1) ) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + kphase = TK(cosp, sinp); + } + if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + { + dm_pair.add_from_matrix(dm[ik].data(), pv->get_row_size(), kphase, 1); + } + else + { + dm_pair.add_from_matrix(dm[ik].data(), pv->get_col_size(), kphase, 0); + } + } + + dm_current = dm_pair.get_pointer(); + key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z); + key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z); for (int iw1=0; iw1 nlm1 = this->nlm_save[iat][ad1][row_indexes[iw1]][0]; - std::vector> nlm2 = this->nlm_save[iat][ad2][col_indexes[iw2]]; + std::vector nlm1; + std::vector> nlm2; + + if constexpr (std::is_same::value) + { + nlm1 = this->nlm_save[iat][ad1][row_indexes[iw1]][0]; + nlm2 = this->nlm_save[iat][ad2][col_indexes[iw2]]; + } + else + { + nlm1 = this->nlm_save_k[iat][key_1][row_indexes[iw1]][0]; + nlm2 = this->nlm_save_k[iat][key_2][col_indexes[iw2]]; + } assert(nlm1.size()==nlm2[0].size()); @@ -178,8 +227,16 @@ void LCAO_Deepks::cal_gdmx(const std::vector& dm, assert(ib==nlm1.size()); if (isstress) { - nlm1 = this->nlm_save[iat][ad2][col_indexes[iw2]][0]; - nlm2 = this->nlm_save[iat][ad1][row_indexes[iw1]]; + if constexpr (std::is_same::value) + { + nlm1 = this->nlm_save[iat][ad2][col_indexes[iw2]][0]; + nlm2 = this->nlm_save[iat][ad1][row_indexes[iw1]]; + } + else + { + nlm1 = this->nlm_save_k[iat][key_2][col_indexes[iw2]][0]; + nlm2 = this->nlm_save_k[iat][key_1][row_indexes[iw1]]; + } assert(nlm1.size()==nlm2[0].size()); int ib=0; @@ -278,4 +335,20 @@ void LCAO_Deepks::check_gdmx(const int nat) } } +template void LCAO_Deepks::cal_gdmx(const std::vector>& dm, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver& GridD, + const int nks, + const std::vector>& kvec_d, + const bool isstress); + +template void LCAO_Deepks::cal_gdmx>(const std::vector>>& dm, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver& GridD, + const int nks, + const std::vector>& kvec_d, + const bool isstress); + #endif diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp deleted file mode 100644 index 62a3a25c26..0000000000 --- a/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp +++ /dev/null @@ -1,249 +0,0 @@ -#ifdef __DEEPKS - -#include "module_parameter/parameter.h" -#include "LCAO_deepks.h" -#include "module_base/vector3.h" -#include "module_base/timer.h" -#include "module_base/constants.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" -#include "module_base/libm/libm.h" - -void LCAO_Deepks::cal_gdmx_k(const std::vector>>& dm, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - Grid_Driver& GridD, - const int nks, - const std::vector> &kvec_d, - const bool isstress) -{ - ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx_k"); - ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx_k"); - - const int size = (2 * lmaxd + 1) * (2 * lmaxd + 1); - - for (int iat = 0;iat < ucell.nat;iat++) - { - for (int inl = 0;inl < inlmax;inl++) - { - ModuleBase::GlobalFunc::ZEROS(gdmx[iat][inl], size); - ModuleBase::GlobalFunc::ZEROS(gdmy[iat][inl], size); - ModuleBase::GlobalFunc::ZEROS(gdmz[iat][inl], size); - } - } - - if (isstress) - { - for (int ipol = 0;ipol < 6;ipol++) - { - for (int inl = 0;inl < inlmax;inl++) - { - ModuleBase::GlobalFunc::ZEROS(gdm_epsl[ipol][inl], size); - } - } - } - - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - int nrow = this->pv->nrow; - for (int T0 = 0; T0 < ucell.ntype; T0++) - { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) - { - const int iat = ucell.itia2iat(T0,I0);//on which alpha is located - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); - - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); - - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const int ibt2 = ucell.itia2iat(T2,I2); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1 - || dist2 > Rcut_Alpha + Rcut_AO2) - { - continue; - } - - double r0[3]; - double r1[3]; - if(isstress) - { - r1[0] = ( tau1.x - tau0.x) ; - r1[1] = ( tau1.y - tau0.y) ; - r1[2] = ( tau1.z - tau0.z) ; - r0[0] = ( tau2.x - tau0.x) ; - r0[1] = ( tau2.y - tau0.y) ; - r0[2] = ( tau2.z - tau0.z) ; - } - - auto row_indexes = pv->get_indexes_row(ibt1); - auto col_indexes = pv->get_indexes_col(ibt2); - if(row_indexes.size() * col_indexes.size() == 0) continue; - - hamilt::AtomPair dm_pair(ibt1, ibt2, (dR2-dR1).x, (dR2-dR1).y, (dR2-dR1).z, pv); - dm_pair.allocate(nullptr, 1); - for(int ik=0;ik kphase = std::complex(cosp, sinp); - if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) - { - dm_pair.add_from_matrix(dm[ik].data(), pv->get_row_size(), kphase, 1); - } - else - { - dm_pair.add_from_matrix(dm[ik].data(), pv->get_col_size(), kphase, 0); - } - } - const double* dm_current = dm_pair.get_pointer(); - - key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z); - key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z); - for (int iw1l = 0; iw1l < row_indexes.size(); ++iw1l) - { - for (int iw2l = 0; iw2l < col_indexes.size(); ++iw2l) - { - std::vector nlm1 = this->nlm_save_k[iat][key_1][row_indexes[iw1l]][0]; - std::vector> nlm2 = this->nlm_save_k[iat][key_2][col_indexes[iw2l]]; - - assert(nlm1.size()==nlm2[0].size()); - - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < 2 * L0 + 1;++m1) - { - for (int m2 = 0; m2 < 2 * L0 + 1; ++m2) - { - //() - gdmx[iat][inl][m1*nm+m2] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[iat][inl][m1*nm+m2] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[iat][inl][m1*nm+m2] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; - - //() - gdmx[iat][inl][m2*nm+m1] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[iat][inl][m2*nm+m1] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[iat][inl][m2*nm+m1] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; - - //() = -() - gdmx[ibt2][inl][m1*nm+m2] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[ibt2][inl][m1*nm+m2] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[ibt2][inl][m1*nm+m2] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; - - //() = -() - gdmx[ibt2][inl][m2*nm+m1] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[ibt2][inl][m2*nm+m1] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[ibt2][inl][m2*nm+m1] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; - - if (isstress) - { - int mm = 0; - for(int ipol=0;ipol<3;ipol++) - { - for(int jpol=ipol;jpol<3;jpol++) - { - gdm_epsl[mm][inl][m2*nm+m1] += ucell.lat0 * - *dm_current * (nlm2[jpol+1][ib+m2] * nlm1[ib+m1] * r0[ipol]); - mm++; - } - } - } - - } - } - ib+=nm; - } - } - assert(ib==nlm1.size()); - - if (isstress) - { - nlm1 = this->nlm_save_k[iat][key_2][col_indexes[iw2l]][0]; - nlm2 = this->nlm_save_k[iat][key_1][row_indexes[iw1l]]; - - assert(nlm1.size()==nlm2[0].size()); - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm; ++m1) - { - for (int m2 = 0; m2 < nm; ++m2) - { - int mm = 0; - for(int ipol=0;ipol<3;ipol++) - { - for(int jpol=ipol;jpol<3;jpol++) - { - gdm_epsl[mm][inl][m2*nm+m1] += ucell.lat0 * - *dm_current * (nlm1[ib+m1] * nlm2[jpol+1][ib+m2] * r1[ipol]); - mm++; - } - } - } - } - ib+=nm; - } - } - } - dm_current++; - }//iw2 - }//iw1 - }//ad2 - }//ad1 - }//I0 - }//T0 - -#ifdef __MPI - for(int iat=0;iatinlmax,size,this->gdmx[iat]); - allsum_deepks(this->inlmax,size,this->gdmy[iat]); - allsum_deepks(this->inlmax,size,this->gdmz[iat]); - } - if (isstress) - { - for(int ipol=0;ipol<6;ipol++) - { - allsum_deepks(this->inlmax,size,this->gdm_epsl[ipol]); - } - } -#endif - ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx_k"); - return; -} - -#endif diff --git a/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp b/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp index 66125bf042..54d143687c 100644 --- a/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp +++ b/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp @@ -4,98 +4,11 @@ #include "LCAO_deepks.h" #include "module_base/parallel_reduce.h" - -void DeePKS_domain::save_h_mat( - const double *h_mat_in, - const int nloc, - const int ik) -{ - for(int i=0;i *h_mat_in, - const int nloc, - const int ik) -{ - for(int i=0;i void DeePKS_domain::collect_h_mat( const Parallel_Orbitals &pv, - const std::vector& h_in, - ModuleBase::matrix &h_out, - const int nlocal) -{ - ModuleBase::TITLE("DeePKS_domain", "collect_h_tot"); - - //construct the total H matrix -#ifdef __MPI - int ir=0; - int ic=0; - for (int i=0; i lineH(nlocal-i,0.0); - - ir = pv.global2local_row(i); - if (ir>=0) - { - // data collection - for (int j=i; j=0) - { - int iic=0; - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) - { - iic=ir+ic*pv.nrow; - } - else - { - iic=ir*pv.ncol+ic; - } - lineH[j-i] = h_in[iic]; - } - } - } - else - { - //do nothing - } - - Parallel_Reduce::reduce_all(lineH.data(),nlocal-i); - - for (int j=i; j>>& h_in, - std::vector &h_out, + const std::vector>& h_in, + std::vector &h_out, const int nlocal, const int nks) { @@ -109,7 +22,7 @@ void DeePKS_domain::collect_h_mat( int ic=0; for (int i=0; i> lineH(nlocal-i, std::complex(0.0, 0.0)); + std::vector lineH(nlocal-i, TK(0.0)); ir = pv.global2local_row(i); if (ir>=0) @@ -159,28 +72,9 @@ void DeePKS_domain::collect_h_mat( } } -//just for gamma-only now -void DeePKS_domain::check_h_mat( - const ModuleBase::matrix &H, - const std::string &h_file, - const int nlocal) -{ - std::ofstream ofs(h_file.c_str()); - ofs << std::setprecision(10); - for (int i=0; i void DeePKS_domain::check_h_mat( - const std::vector &H, + const std::vector &H, const std::string &h_file, const int nlocal, const int nks) @@ -202,4 +96,30 @@ void DeePKS_domain::check_h_mat( ofs.close(); } +template void DeePKS_domain::collect_h_mat( + const Parallel_Orbitals &pv, + const std::vector>& h_in, + std::vector &h_out, + const int nlocal, + const int nks); + +template void DeePKS_domain::collect_h_mat, ModuleBase::ComplexMatrix>( + const Parallel_Orbitals &pv, + const std::vector>>& h_in, + std::vector &h_out, + const int nlocal, + const int nks); + +template void DeePKS_domain::check_h_mat( + const std::vector &H, + const std::string &h_file, + const int nlocal, + const int nks); + +template void DeePKS_domain::check_h_mat( + const std::vector &H, + const std::string &h_file, + const int nlocal, + const int nks); + #endif diff --git a/source/module_hamilt_lcao/module_deepks/deepks_hmat.h b/source/module_hamilt_lcao/module_deepks/deepks_hmat.h index fe4d51dea5..9668d5ede9 100644 --- a/source/module_hamilt_lcao/module_deepks/deepks_hmat.h +++ b/source/module_hamilt_lcao/module_deepks/deepks_hmat.h @@ -14,37 +14,19 @@ namespace DeePKS_domain { - void save_h_mat( - const double *h_mat_in, - const int nloc, - const int ik); - - void save_h_mat( - const std::complex *h_mat_in, - const int nloc, - const int ik); - //Collect data in h_in to matrix h_out. Note that left lower trianger in h_out is filled + template void collect_h_mat( const Parallel_Orbitals &pv, - const std::vector& h_in, - ModuleBase::matrix &h_out, - const int nlocal); - - void collect_h_mat( - const Parallel_Orbitals &pv, - const std::vector>>& h_in, - std::vector &h_out, - const int nlocal, + const std::vector>& h_in, + std::vector &h_out, + const int nlocal, const int nks); + // write h_mat to file h_file for checking // not used in the code now + template void check_h_mat( - const ModuleBase::matrix &H, - const std::string &h_file, - const int nlocal); - - void check_h_mat( - const std::vector &H, + const std::vector &H, const std::string &h_file, const int nlocal, const int nks); diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp index 0e01573331..5f202d1532 100644 --- a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp +++ b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp @@ -3,9 +3,6 @@ /// cal_orbital_precalc : orbital_precalc is usted for training with orbital label, /// which equals gvdm * orbital_pdm_shell, /// orbital_pdm_shell[1,Inl,nm*nm] = dm_hl * overlap * overlap -/// cal_orbital_precalc_k : orbital_precalc is usted for training with orbital label, -/// for multi-k case, which equals gvdm * orbital_pdm_shell, -/// orbital_pdm_shell[1,Inl,nm*nm] = dm_hl_k * overlap * overlap #include "LCAO_deepks.h" #include "LCAO_deepks_io.h" // mohan add 2024-07-22 @@ -18,21 +15,24 @@ // calculates orbital_precalc[1,NAt,NDscrpt] = gvdm * orbital_pdm_shell; // orbital_pdm_shell[2,Inl,nm*nm] = dm_hl * overlap * overlap; +template void LCAO_Deepks::cal_orbital_precalc( - const std::vector>& dm_hl, + const std::vector>& dm_hl, const int nat, + const int nks, + const std::vector>& kvec_d, const UnitCell& ucell, const LCAO_Orbitals& orb, Grid_Driver& GridD) { - ModuleBase::TITLE("LCAO_Deepks", "cal_orbital_precalc"); + ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc"); this->cal_gvdm(nat); const double Rcut_Alpha = orb.Alpha[0].getRcut(); - this->init_orbital_pdm_shell(1); + this->init_orbital_pdm_shell(nks); for (int T0 = 0; T0 < ucell.ntype; T0++) { @@ -85,6 +85,21 @@ void LCAO_Deepks::cal_orbital_precalc( continue; } + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, + GridD.getBox(ad1).y, + GridD.getBox(ad1).z); + + key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); + + if constexpr (std::is_same>::value) + { + if (this->nlm_save_k[iat].find(key_1) + == this->nlm_save_k[iat].end()) + { + continue; + } + } + auto row_indexes = pv->get_indexes_row(ibt1); const int row_size = row_indexes.size(); @@ -96,12 +111,19 @@ void LCAO_Deepks::cal_orbital_precalc( std::vector s_1t(trace_alpha_size * row_size); - std::vector g_1dmt(trace_alpha_size * row_size, 0.0); + std::vector g_1dmt(nks * trace_alpha_size * row_size, 0.0); for (int irow = 0; irow < row_size; irow++) { - const double* row_ptr - = this->nlm_save[iat][ad1][row_indexes[irow]][0].data(); + double* row_ptr; + if constexpr (std::is_same::value) + { + row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data(); + } + else + { + row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data(); + } for (int i = 0; i < trace_alpha_size; i++) { s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]]; @@ -113,6 +135,13 @@ void LCAO_Deepks::cal_orbital_precalc( const int T2 = GridD.getType(ad2); const int I2 = GridD.getNatom(ad2); const int ibt2 = ucell.itia2iat(T2, I2); + if constexpr (std::is_same>::value) // Why only for multi-k? + { + if (ibt1 > ibt2) + { + continue; + } + } const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); const Atom* atom2 = &ucell.atoms[T2]; @@ -126,6 +155,21 @@ void LCAO_Deepks::cal_orbital_precalc( continue; } + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, + GridD.getBox(ad2).y, + GridD.getBox(ad2).z); + + key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); + + if constexpr (std::is_same>::value) + { + if (this->nlm_save_k[iat].find(key_2) + == this->nlm_save_k[iat].end()) + { + continue; + } + } + auto col_indexes = pv->get_indexes_col(ibt2); const int col_size = col_indexes.size(); if (col_size == 0) @@ -136,8 +180,15 @@ void LCAO_Deepks::cal_orbital_precalc( std::vector s_2t(trace_alpha_size * col_size); for (int icol = 0; icol < col_size; icol++) { - const double* col_ptr - = this->nlm_save[iat][ad2][col_indexes[icol]][0].data(); + double* col_ptr; + if constexpr (std::is_same::value) + { + col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data(); + } + else + { + col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data(); + } for (int i = 0; i < trace_alpha_size; i++) { s_2t[i * col_size + icol] @@ -145,40 +196,94 @@ void LCAO_Deepks::cal_orbital_precalc( } } - std::vector dm_array(row_size * col_size, 0.0); - for (int is = 0; is < PARAM.inp.nspin; is++) - { - hamilt::AtomPair dm_pair(ibt1, - ibt2, - 0, - 0, - 0, - pv); + std::vector dm_array(row_size * nks * col_size, 0.0); - dm_pair.allocate(dm_array.data(), 0); + const int row_size_nks = row_size * nks; - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + if constexpr (std::is_same::value) + { + for (int is = 0; is < PARAM.inp.nspin; is++) { - dm_pair.add_from_matrix(dm_hl[0][is].c, - pv->get_row_size(), - 1.0, - 1); - } - else + hamilt::AtomPair dm_pair(ibt1, + ibt2, + 0, + 0, + 0, + pv); + + dm_pair.allocate(dm_array.data(), 0); + + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + { + dm_pair.add_from_matrix(dm_hl[0][is].c, + pv->get_row_size(), + 1.0, + 1); + } + else + { + dm_pair.add_from_matrix(dm_hl[0][is].c, + pv->get_col_size(), + 1.0, + 0); + } + } // is + } + else + { + for (int ik = 0; ik < nks; ik++) { - dm_pair.add_from_matrix(dm_hl[0][is].c, - pv->get_col_size(), - 1.0, - 0); - } - } // is + hamilt::AtomPair dm_pair(ibt1, + ibt2, + (dR2 - dR1).x, + (dR2 - dR1).y, + (dR2 - dR1).z, + pv); + + dm_pair.allocate(&dm_array[ik * row_size * col_size], 0); + + const double arg + = -(kvec_d[ik] * (dR2 - dR1)) * ModuleBase::TWO_PI; + + double sinp, cosp; + + ModuleBase::libm::sincos(arg, &sinp, &cosp); + + const std::complex kphase + = std::complex(cosp, sinp); + + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + { + dm_pair.add_from_matrix(dm_hl[0][ik].c, + pv->get_row_size(), + kphase, + 1); + } + else + { + dm_pair.add_from_matrix(dm_hl[0][ik].c, + pv->get_col_size(), + kphase, + 0); + } + } // ik + } // dgemm for s_2t and dm_array to get g_1dmt constexpr char transa = 'T', transb = 'N'; - const double gemm_alpha = 1.0, gemm_beta = 1.0; + double gemm_alpha = 1.0, gemm_beta = 1.0; + + if constexpr (std::is_same>::value) + { + if (ibt1 < ibt2) + { + gemm_alpha = 2.0; + } + } + dgemm_(&transa, &transb, - &row_size, + &row_size_nks, &trace_alpha_size, &col_size, &gemm_alpha, @@ -188,46 +293,54 @@ void LCAO_Deepks::cal_orbital_precalc( &col_size, &gemm_beta, g_1dmt.data(), - &row_size); + &row_size_nks); } // ad2 - // do dot of g_1dmt and s_1t to get orbital_pdm_shell - const double* p_g1dmt = g_1dmt.data(); + for (int ik = 0; ik < nks; ik++) + { + // do dot of g_1dmt and s_1t to get orbital_pdm_shell + const double* p_g1dmt = g_1dmt.data() + ik * row_size; - int ib = 0, index = 0, inc = 1; + int ib = 0, index = 0, inc = 1; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) - { - for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2 * L0 + 1; - - for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { - for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d + const int inl = this->inl_index[T0](I0, L0, N0); + const int nm = 2 * L0 + 1; + + for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d { - orbital_pdm_shell[0][0][inl][m1 * nm + m2] - += ddot_(&row_size, - p_g1dmt + index * row_size, - &inc, - s_1t.data() + index * row_size, - &inc); - index++; + for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d + { + orbital_pdm_shell[ik][0][inl][m1 * nm + m2] + += ddot_(&row_size, + p_g1dmt + index * row_size * nks, + &inc, + s_1t.data() + index * row_size, + &inc); + index++; + } } + ib += nm; } - ib += nm; } } } // ad1 } } #ifdef __MPI - for (int inl = 0; inl < this->inlmax; inl++) + for (int iks = 0; iks < nks; iks++) { - Parallel_Reduce::reduce_all(this->orbital_pdm_shell[0][0][inl], - (2 * this->lmaxd + 1) - * (2 * this->lmaxd + 1)); + for (int hl = 0; hl < 1; hl++) + { + for (int inl = 0; inl < this->inlmax; inl++) + { + Parallel_Reduce::reduce_all(this->orbital_pdm_shell[iks][hl][inl], + (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)); + } + } } #endif @@ -239,33 +352,35 @@ void LCAO_Deepks::cal_orbital_precalc( for (int nl = 0; nl < nlmax; ++nl) { std::vector kiammv; - for (int iks = 0; iks < 1; ++iks) + for (int iks = 0; iks < nks; ++iks) { std::vector iammv; - std::vector ammv; - for (int iat = 0; iat < nat; ++iat) + for (int hl = 0; hl < 1; ++hl) { - int inl = iat * nlmax + nl; - int nm = 2 * this->inl_l[inl] + 1; - std::vector mmv; - - for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d + std::vector ammv; + for (int iat = 0; iat < nat; ++iat) { - for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d + int inl = iat * nlmax + nl; + int nm = 2 * this->inl_l[inl] + 1; + std::vector mmv; + + for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d { - mmv.push_back( - this->orbital_pdm_shell[iks][0][inl][m1 * nm + m2]); + for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d + { + mmv.push_back( + this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]); + } } - } - torch::Tensor mm = torch::tensor(mmv, - torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm + torch::Tensor mm = torch::tensor(mmv, + torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm - ammv.push_back(mm); + ammv.push_back(mm); + } + torch::Tensor amm = torch::stack(ammv, 0); + iammv.push_back(amm); } - torch::Tensor amm = torch::stack(ammv, 0); - iammv.push_back(amm); torch::Tensor iamm = torch::stack(iammv, 0); // inl*nm*nm - // orbital_pdm_shell_vector.push_back(iamm); kiammv.push_back(iamm); } torch::Tensor kiamm = torch::stack(kiammv, 0); @@ -284,8 +399,25 @@ void LCAO_Deepks::cal_orbital_precalc( } this->orbital_precalc_tensor = torch::cat(orbital_precalc_vector, -1); - this->del_orbital_pdm_shell(1); + this->del_orbital_pdm_shell(nks); + ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc"); return; } +template void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, + const int nat, + const int nks, + const std::vector>& kvec_d, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + Grid_Driver& GridD); + + +template void LCAO_Deepks::cal_orbital_precalc, ModuleBase::ComplexMatrix>(const std::vector>& dm_hl, + const int nat, + const int nks, + const std::vector>& kvec_d, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + Grid_Driver& GridD); #endif diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp deleted file mode 100644 index fd8b24e0c7..0000000000 --- a/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp +++ /dev/null @@ -1,350 +0,0 @@ -#ifdef __DEEPKS - -#include "LCAO_deepks.h" -#include "LCAO_deepks_io.h" // mohan add 2024-07-22 -#include "module_base/blas_connector.h" -#include "module_base/constants.h" -#include "module_base/libm/libm.h" -#include "module_base/parallel_reduce.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" -#include "module_parameter/parameter.h" - -// calculates orbital_precalc[nks,2,NAt,NDscrpt] = gvdm * orbital_pdm_shell for -// multi-k case; orbital_pdm_shell[nks,2,Inl,nm*nm] = dm_hl_k * overlap * -// overlap; -void LCAO_Deepks::cal_orbital_precalc_k( - const std::vector>& dm_hl_k, - const int nat, - const int nks, - const std::vector>& kvec_d, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - Grid_Driver& GridD) { - ModuleBase::TITLE("LCAO_Deepks", "calc_orbital_precalc_k"); - ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc_k"); - - this->cal_gvdm(nat); - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - this->init_orbital_pdm_shell(nks); - - for (int T0 = 0; T0 < ucell.ntype; T0++) - { - Atom* atom0 = &ucell.atoms[T0]; - - for (int I0 = 0; I0 < atom0->na; I0++) - { - const int iat = ucell.itia2iat(T0, I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - - // trace alpha orbital - std::vector trace_alpha_row; - std::vector trace_alpha_col; - int ib = 0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) - { - for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) - { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2 * L0 + 1; - - for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d - { - for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d - { - trace_alpha_row.push_back(ib + m1); - trace_alpha_col.push_back(ib + m2); - } - } - ib += nm; - } - } - const int trace_alpha_size = trace_alpha_row.size(); - - for (int ad1 = 0; ad1 < GridD.getAdjacentNum() + 1; ++ad1) - { - const int T1 = GridD.getType(ad1); - const int I1 = GridD.getNatom(ad1); - const int ibt1 = ucell.itia2iat(T1, I1); - const ModuleBase::Vector3 tau1 - = GridD.getAdjacentTau(ad1); - const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - if (dist1 >= Rcut_Alpha + Rcut_AO1) - { - continue; - } - - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw * PARAM.globalv.npol; - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, - GridD.getBox(ad1).y, - GridD.getBox(ad1).z); - - auto row_indexes = pv->get_indexes_row(ibt1); - const int row_size = row_indexes.size(); - if (row_size == 0) - { - continue; - } - - key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); - if (this->nlm_save_k[iat].find(key_1) - == this->nlm_save_k[iat].end()) - { - continue; - } - - std::vector s_1t(trace_alpha_size * row_size); - - std::vector g_1dmt(nks * trace_alpha_size * row_size, 0.0); - - for (int irow = 0; irow < row_size; irow++) - { - const double* row_ptr - = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data(); - - for (int i = 0; i < trace_alpha_size; i++) - { - s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]]; - } - } - - for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int ibt2 = ucell.itia2iat(T2, I2); - if (ibt1 > ibt2) - { - continue; - } - - const ModuleBase::Vector3 tau2 - = GridD.getAdjacentTau(ad2); - - const Atom* atom2 = &ucell.atoms[T2]; - - const int nw2_tot = atom2->nw * PARAM.globalv.npol; - - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, - GridD.getBox(ad2).y, - GridD.getBox(ad2).z); - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist2 = (tau2 - tau0).norm() * ucell.lat0; - - if (dist2 >= Rcut_Alpha + Rcut_AO2) - { - continue; - } - - auto col_indexes = pv->get_indexes_col(ibt2); - const int col_size = col_indexes.size(); - if (col_size == 0) - { - continue; - } - - std::vector s_2t(trace_alpha_size * col_size); - key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); - - if (this->nlm_save_k[iat].find(key_2) - == this->nlm_save_k[iat].end()) - { - continue; - } - - for (int icol = 0; icol < col_size; icol++) - { - const double* col_ptr - = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data(); - - for (int i = 0; i < trace_alpha_size; i++) - { - s_2t[i * col_size + icol] - = col_ptr[trace_alpha_col[i]]; - } - } - - std::vector dm_array(row_size * nks * col_size, 0.0); - - const int row_size_nks = row_size * nks; - - for (int ik = 0; ik < nks; ik++) - { - hamilt::AtomPair dm_pair(ibt1, - ibt2, - (dR2 - dR1).x, - (dR2 - dR1).y, - (dR2 - dR1).z, - pv); - - dm_pair.allocate(&dm_array[ik * row_size * col_size], 0); - - const double arg - = -(kvec_d[ik] * (dR2 - dR1)) * ModuleBase::TWO_PI; - - double sinp, cosp; - - ModuleBase::libm::sincos(arg, &sinp, &cosp); - - const std::complex kphase - = std::complex(cosp, sinp); - - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) - { - dm_pair.add_from_matrix(dm_hl_k[0][ik].c, - pv->get_row_size(), - kphase, - 1); - } - else - { - dm_pair.add_from_matrix(dm_hl_k[0][ik].c, - pv->get_col_size(), - kphase, - 0); - } - } // ik - - // dgemm for s_2t and dm_array to get g_1dmt - constexpr char transa = 'T', transb = 'N'; - double gemm_alpha = 1.0, gemm_beta = 1.0; - - if (ibt1 < ibt2) - { - gemm_alpha = 2.0; - } - - dgemm_(&transa, - &transb, - &row_size_nks, - &trace_alpha_size, - &col_size, - &gemm_alpha, - dm_array.data(), - &col_size, - s_2t.data(), - &col_size, - &gemm_beta, - g_1dmt.data(), - &row_size_nks); - } // ad2 - - for (int ik = 0; ik < nks; ik++) - { - // do dot of g_1dmt and s_1t to get orbital_pdm_shell - const double* p_g1dmt = g_1dmt.data() + ik * row_size; - - int ib = 0, index = 0, inc = 1; - - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) - { - for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) - { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2 * L0 + 1; - - for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d - { - for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d - { - orbital_pdm_shell[ik][0][inl][m1 * nm + m2] - += ddot_(&row_size, - p_g1dmt - + index * row_size * nks, - &inc, - s_1t.data() + index * row_size, - &inc); - index++; - } - } - ib += nm; - } - } - } - } // ad1 - } - } - -#ifdef __MPI - for (int iks = 0; iks < nks; iks++) - { - for (int hl = 0; hl < 1; hl++) - { - for (int inl = 0; inl < this->inlmax; inl++) - { - Parallel_Reduce::reduce_all( - this->orbital_pdm_shell[iks][hl][inl], - (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)); - } - } - } -#endif - - // transfer orbital_pdm_shell to orbital_pdm_shell_vector - - int nlmax = this->inlmax / nat; - - std::vector orbital_pdm_shell_vector; - - for (int nl = 0; nl < nlmax; ++nl) - { - std::vector kiammv; - for (int iks = 0; iks < nks; ++iks) - { - std::vector iammv; - for (int hl = 0; hl < 1; ++hl) - { - std::vector ammv; - for (int iat = 0; iat < nat; ++iat) - { - int inl = iat * nlmax + nl; - int nm = 2 * this->inl_l[inl] + 1; - std::vector mmv; - - for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d - { - for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d - { - mmv.push_back( - this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]); - } - } - torch::Tensor mm - = torch::tensor( - mmv, - torch::TensorOptions().dtype(torch::kFloat64)) - .reshape({nm, nm}); // nm*nm - ammv.push_back(mm); - } - torch::Tensor amm = torch::stack(ammv, 0); - iammv.push_back(amm); - } - torch::Tensor iamm = torch::stack(iammv, 0); // inl*nm*nm - kiammv.push_back(iamm); - } - torch::Tensor kiamm = torch::stack(kiammv, 0); - orbital_pdm_shell_vector.push_back(kiamm); - } - - assert(orbital_pdm_shell_vector.size() == nlmax); - - // einsum for each nl: - std::vector orbital_precalc_vector; - for (int nl = 0; nl < nlmax; ++nl) - { - orbital_precalc_vector.push_back( - at::einsum("kiamn, avmn->kiav", - {orbital_pdm_shell_vector[nl], this->gevdm_vector[nl]})); - } - this->orbital_precalc_tensor = torch::cat(orbital_precalc_vector, -1); - - this->del_orbital_pdm_shell(nks); - ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc_k"); - return; -} - -#endif diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp index 010da2073d..c3d1abe61f 100644 --- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp +++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp @@ -126,11 +126,11 @@ void test_deepks::check_gdmx() this->ld.init_gdmx(ucell.nat); if (PARAM.sys.gamma_only_local) { - this->ld.cal_gdmx(dm_new[0], ucell, ORB, Test_Deepks::GridD, 0); + this->ld.cal_gdmx(dm_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0); } else { - this->ld.cal_gdmx_k(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0); + this->ld.cal_gdmx(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0); } this->ld.check_gdmx(ucell.nat); diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp index 0c91180d9a..25eebd7490 100644 --- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp +++ b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp @@ -18,8 +18,11 @@ // calculates v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell; // v_delta_pdm_shell[nks,nlocal,nlocal,Inl,nm*nm] = overlap * overlap; // for deepks_v_delta = 1 +template void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, const int nat, + const int nks, + const std::vector> &kvec_d, const UnitCell &ucell, const LCAO_Orbitals &orb, Grid_Driver &GridD) @@ -30,7 +33,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, this->cal_gvdm(nat); const double Rcut_Alpha = orb.Alpha[0].getRcut(); - this->init_v_delta_pdm_shell(1,nlocal); // 1 for gamma-only + this->init_v_delta_pdm_shell(nks,nlocal); for (int T0 = 0; T0 < ucell.ntype; T0++) { @@ -46,6 +49,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, { const int T1 = GridD.getType(ad1); const int I1 = GridD.getNatom(ad1); + const int ibt1 = ucell.itia2iat(T1, I1); const int start1 = ucell.itiaiw2iwt(T1, I1, 0); const ModuleBase::Vector3 tau1 = GridD.getAdjacentTau(ad1); const Atom* atom1 = &ucell.atoms[T1]; @@ -57,11 +61,27 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, { continue; } + + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, + GridD.getBox(ad1).y, + GridD.getBox(ad1).z); + + key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); + + if constexpr (std::is_same>::value) + { + if (this->nlm_save_k[iat].find(key_1) + == this->nlm_save_k[iat].end()) + { + continue; + } + } for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) { const int T2 = GridD.getType(ad2); const int I2 = GridD.getNatom(ad2); + const int ibt2 = ucell.itia2iat(T2, I2); const int start2 = ucell.itiaiw2iwt(T2, I2, 0); const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); const Atom* atom2 = &ucell.atoms[T2]; @@ -75,43 +95,83 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, continue; } + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, + GridD.getBox(ad2).y, + GridD.getBox(ad2).z); + key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); + + if constexpr (std::is_same>::value) + { + if (this->nlm_save_k[iat].find(key_2) + == this->nlm_save_k[iat].end()) + { + continue; + } + } + for (int iw1=0; iw1global2local_row(iw1_all); - if(iw1_local < 0) {continue; -} + if(iw1_local < 0) {continue;} const int iw1_0 = iw1/PARAM.globalv.npol; for (int iw2=0; iw2global2local_col(iw2_all); - if(iw2_local < 0) {continue; -} + if(iw2_local < 0) {continue;} const int iw2_0 = iw2/PARAM.globalv.npol; - std::vector nlm1 = this->nlm_save[iat][ad1][iw1][0]; - std::vector nlm2 = this->nlm_save[iat][ad2][iw2][0]; + std::vector nlm1; + std::vector nlm2; + if constexpr (std::is_same::value) + { + nlm1 = this->nlm_save[iat][ad1][iw1][0]; + nlm2 = this->nlm_save[iat][ad2][iw2][0]; + } + else + { + nlm1 = this->nlm_save_k[iat][key_1][iw1][0]; + nlm2 = this->nlm_save_k[iat][key_2][iw2][0]; + } assert(nlm1.size()==nlm2.size()); - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + for (int ik = 0; ik < nks; ik++) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + int ib=0; + std::complex kphase = std::complex(1.0, 0.0); + if constexpr (std::is_same>::value) { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1(cosp, sinp); + } + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + { + for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) { - for (int m2=0; m2inl_index[T0](I0, L0, N0); + const int nm = 2*L0+1; + + for (int m1=0; m1::value) + { + this->v_delta_pdm_shell[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]; + } + else + { + this->v_delta_pdm_shell_complex[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]*kphase; + } + } } + ib+=nm; } - ib+=nm; - } - } + } + } //ik }//iw2 }//iw1 }//ad2 @@ -121,13 +181,23 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, } #ifdef __MPI const int mn_size=(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1); - for(int inl = 0; inl < this->inlmax; inl++) + for (int ik = 0; ik < nks; ik++) { - for(int mu = 0; mu < nlocal ; mu++) + for(int inl = 0; inl < this->inlmax; inl++) { - for(int nu=0; nu< nlocal ; nu++) + for(int mu = 0; mu < nlocal ; mu++) { - Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[0][mu][nu][inl],mn_size); + for(int nu=0; nu< nlocal ; nu++) + { + if constexpr (std::is_same::value) + { + Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[ik][mu][nu][inl],mn_size); + } + else + { + Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl],mn_size); + } + } } } } @@ -141,7 +211,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, for(int nl = 0; nl < nlmax; ++nl) { std::vector kuuammv; - for(int iks = 0; iks < 1; ++iks) + for(int iks = 0; iks < nks; ++iks) { std::vector uuammv; for(int mu = 0; mu < nlocal; ++mu) @@ -154,17 +224,32 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, { int inl = iat*nlmax+nl; int nm = 2*this->inl_l[inl]+1; - std::vector mmv; + std::vector mmv; for (int m1=0; m1v_delta_pdm_shell[iks][mu][nu][inl][m1*nm+m2]); + if constexpr (std::is_same::value) + { + mmv.push_back(this->v_delta_pdm_shell[iks][mu][nu][inl][m1*nm+m2]); + } + else + { + mmv.push_back(this->v_delta_pdm_shell_complex[iks][mu][nu][inl][m1*nm+m2]); + } } } - torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64) ).reshape({nm, nm}); //nm*nm - ammv.push_back(mm); + if constexpr (std::is_same::value) + { + torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64) ).reshape({nm, nm}); //nm*nm + ammv.push_back(mm); + } + else + { + torch::Tensor mm = torch::from_blob(mmv.data(), {nm, nm}, torch::TensorOptions().dtype(torch::kComplexDouble)).clone(); //nm*nm + ammv.push_back(mm); + } } torch::Tensor amm = torch::stack(ammv, 0); uammv.push_back(amm); @@ -184,12 +269,20 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, //einsum for each nl: std::vector v_delta_precalc_vector; for (int nl = 0; nlkxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]})); + { + if constexpr (std::is_same::value) + { + v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]})); + } + else + { + torch::Tensor gevdm_vector_complex = this->gevdm_vector[nl].to(torch::kComplexDouble); + v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex})); + } } this->v_delta_precalc_tensor = torch::cat(v_delta_precalc_vector, -1); - this->del_v_delta_pdm_shell(1,nlocal); + this->del_v_delta_pdm_shell(nks,nlocal); //check_v_delta_precalc(nlocal,nat); // timeval t_end; @@ -198,6 +291,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, return; } +template void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int nlocal) { std::ofstream ofs("v_delta_precalc.dat"); @@ -212,7 +306,15 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n { for(int p=0; pdes_per_atom; ++p) { - ofs<v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble()<<" "; + if constexpr (std::is_same::value) + { + ofs << this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble() << " " ; + } + else + { + auto tensor_value = this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p}); + ofs << std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()) << " " ; + } } } ofs << std::endl; @@ -222,4 +324,23 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n ofs.close(); } +template void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, + const int nat, + const int nks, + const std::vector> &kvec_d, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver &GridD); + +template void LCAO_Deepks::cal_v_delta_precalc>(const int nlocal, + const int nat, + const int nks, + const std::vector> &kvec_d, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver &GridD); + +template void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks, const int nlocal); +template void LCAO_Deepks::check_v_delta_precalc>(const int nat, const int nks, const int nlocal); + #endif diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp deleted file mode 100644 index 1a1d3b4006..0000000000 --- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp +++ /dev/null @@ -1,234 +0,0 @@ -#ifdef __DEEPKS - -#include "LCAO_deepks.h" -#include "LCAO_deepks_io.h" -#include "module_base/blas_connector.h" -#include "module_base/constants.h" -#include "module_base/libm/libm.h" -#include "module_base/parallel_reduce.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" -#include "module_parameter/parameter.h" - - -// calculates v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell; -// v_delta_pdm_shell[nks,nlocal,nlocal,Inl,nm*nm] = overlap * overlap; -// for deepks_v_delta = 1 -void LCAO_Deepks::cal_v_delta_precalc_k(const int nlocal, - const int nat, - const int nks, - const std::vector> &kvec_d, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - Grid_Driver &GridD) -{ - ModuleBase::TITLE("LCAO_Deepks", "calc_v_delta_precalc"); - // timeval t_start; - // gettimeofday(&t_start,NULL); - - this->cal_gvdm(nat); - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - this->init_v_delta_pdm_shell(nks,nlocal); // multi-k - - for (int T0 = 0; T0 < ucell.ntype; T0++) - { - Atom* atom0 = &ucell.atoms[T0]; - - for (int I0 =0; I0< atom0->na; I0++) - { - const int iat = ucell.itia2iat(T0,I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); - - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - if (dist1 >= Rcut_Alpha + Rcut_AO1) - { - continue; - } - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, - GridD.getBox(ad1).y, - GridD.getBox(ad1).z); - - key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); - - if (this->nlm_save_k[iat].find(key_1) - == this->nlm_save_k[iat].end()) - { - continue; - } - - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int ibt2 = ucell.itia2iat(T2, I2); - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist2 >= Rcut_Alpha + Rcut_AO2) - { - continue; - } - - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, - GridD.getBox(ad2).y, - GridD.getBox(ad2).z); - - key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); - - if (this->nlm_save_k[iat].find(key_2) - == this->nlm_save_k[iat].end()) - { - continue; - } - - for (int iw1=0; iw1global2local_row(iw1_all); - if(iw1_local < 0) {continue;} - const int iw1_0 = iw1/PARAM.globalv.npol; - for (int iw2=0; iw2global2local_col(iw2_all); - if(iw2_local < 0) {continue;} - const int iw2_0 = iw2/PARAM.globalv.npol; - // Should use nlm_save_k, to be modified here!!! - std::vector nlm1 = this->nlm_save_k[iat][key_1][iw1][0]; - std::vector nlm2 = this->nlm_save_k[iat][key_2][iw2][0]; - assert(nlm1.size()==nlm2.size()); - for (int ik = 0; ik < nks; ik++) - { - int ib=0; - const double arg = - (kvec_d[ik] * (dR1-dR2) ) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - const std::complex kphase = std::complex(cosp, sinp); - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1lmaxd + 1) * (2 * this->lmaxd + 1); - for(int ik = 0; ik < nks; ik++) - { - for(int inl = 0; inl < this->inlmax; inl++) - { - for(int mu = 0; mu < nlocal ; mu++) - { - for(int nu=0; nu< nlocal ; nu++) - { - Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl],mn_size); - } - } - } - } -#endif - // transfer v_delta_pdm_shell to v_delta_pdm_shell_vector - - int nlmax = this->inlmax/nat; - - std::vector v_delta_pdm_shell_vector; - for(int nl = 0; nl < nlmax; ++nl) - { - std::vector kuuammv; - for(int iks = 0; iks < nks; ++iks) - { - std::vector uuammv; - for(int mu = 0; mu < nlocal; ++mu) - { - std::vector uammv; - for(int nu =0 ; nu < nlocal; ++nu) - { - std::vector ammv; - for (int iat=0; iatinl_l[inl]+1; - std::vector> mmv; - - for (int m1=0; m1v_delta_pdm_shell_complex[iks][mu][nu][inl][m1*nm+m2]); - } - } - torch::Tensor mm = torch::from_blob(mmv.data(), {nm, nm}, torch::TensorOptions().dtype(torch::kComplexDouble)).clone(); //nm*nm - ammv.push_back(mm); - } - torch::Tensor amm = torch::stack(ammv, 0); - uammv.push_back(amm); - } - torch::Tensor uamm = torch::stack(uammv, 0); - uuammv.push_back(uamm); - } - torch::Tensor uuamm = torch::stack(uuammv, 0); - kuuammv.push_back(uuamm); - } - torch::Tensor kuuamm = torch::stack(kuuammv, 0); - v_delta_pdm_shell_vector.push_back(kuuamm); - } - - assert(v_delta_pdm_shell_vector.size() == nlmax); - - //einsum for each nl: - std::vector v_delta_precalc_vector; - for (int nl = 0; nlgevdm_vector[nl].to(torch::kComplexDouble); - v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex})); - } - - this->v_delta_precalc_tensor = torch::cat(v_delta_precalc_vector, -1); - this->del_v_delta_pdm_shell(nks,nlocal); - - //check_v_delta_precalc(nlocal,nat); - // timeval t_end; - // gettimeofday(&t_end,NULL); - // std::cout<<"calculate v_delta_precalc time:\t"<<(double)(t_end.tv_sec-t_start.tv_sec) + (double)(t_end.tv_usec-t_start.tv_usec)/1000000.0<