From 6a1b5f64db9187b5a2b3fadadc482e31a74932c5 Mon Sep 17 00:00:00 2001 From: William F Godoy Date: Mon, 24 Jul 2023 15:09:01 -0400 Subject: [PATCH 1/4] Implement SPOSetT template class Asses the effort to change this. Currently without consumers or tests. Concretize friend class declaration Define testing::getMyVars for SPOSetT Add FakeSPOT class Move SpinorSet to a templated class Refactor FreeOrbital class Base typed aliases on SPOSet on OrbitalSetTraits Add FullRealType in SPOSet and RotatedSPOs Add this in templated meta class Add explicit function instantions for FreeOrbital Add templated class SHOSetT Signed-off-by: Steven Hahn Add PWRealOrbitalSetT template class Revert test_RotatedSPOs.cpp Revert test_RotatedSPOs.cpp Reorder specialized function definitions to appease OMP target compilation Add ConstantSPOSetT Refactor BsplineSet and SplineC2C Follow existing pattern for SplineC2C allowing for std::complex PWOribitalSetT and PWBasisT Add FullRealType in SPOSet and RotatedSPOs Move generic definition after specialization add implicit implementations Fix some errors initial commit of templated PWOribitSetT that compiles cleanup templateitze PWBasis as well, as is dependancy remove inaccurate comment remove polluted commit Add LCAOrbitalSetT Add templated class LCAOrbitalSetWithCorrectionT Signed-off-by: Steven Hahn Add SPOSetBuilderT, SHOSetBuilderT and SoaCuspCorrectionT Fix PWOrbitalSet alias types Fix LCAOrbitalSetWithCorrectionT Reuse SPOSet types Signed-off-by: Steven Hahn Implement CompositeSPOSetT class Specialize functions in RotatedSPOsT Fix function signature Change order of definition Implement SPOSetBuilderFactoryT and most required builders Add missing implementation to SPOSetBuilderT Signed-off-by: Steven Hahn Add CI to WIP refactoring branch add SplineR2RT Empty-Commit Further template propagation to fix offload build Implement SplineC2RTOMPTarget template class add missing particlesetT Refactored everything needed for test_RotatedSPOsT Add new bits to RotatedSPOsT Bugfix: removed QMC_COMPLEX conditions where no longer needed Accomodate new refactored headers simd::dot and Spline classes Move memory reference inside PRAGMA Causes a memory corruption when defining internal pos Fix pragma typo with is_device_ptr Implement SPOSetT template class Asses the initial effort to refactor SPOSet into templates without consumers or tests. Concretize friend class declaration Define testing::getMyVars for SPOSetT Add FakeSPOT class Move SpinorSet to a templated class Refactor FreeOrbital class Base typed aliases on SPOSet on OrbitalSetTraits Add FullRealType in SPOSet and RotatedSPOs Add this in templated meta class Add explicit function instantions for FreeOrbital Add templated class SHOSetT Add PWRealOrbitalSetT template class Revert test_RotatedSPOs.cpp Signed-off-by: Steven Hahn Revert test_RotatedSPOs.cpp PWOribitalSetT and PWBasisT Add FullRealType in SPOSet and RotatedSPOs Move generic definition after specialization add implicit implementations Fix some errors initial commit of templated PWOribitSetT that compiles cleanup templateitze PWBasis as well, as is dependancy remove inaccurate comment remove polluted commit Start replacing legacy code with templated classes Fix ornl CI tests sulfur: Add missing SPOSet header in NiO tests nitrogen: Add missing memory header for std::unique_ptr clang-format-16 on new files Fix license file headers Fix typo with T1->T --- .github/workflows/ci-github-actions.yaml | 2 + src/Estimators/EstimatorManagerBase.h | 4 +- src/Estimators/EstimatorManagerCrowd.h | 3 +- src/Estimators/OperatorEstBase.h | 3 +- src/Estimators/ScalarEstimatorBase.h | 2 +- .../tests/test_EstimatorManagerCrowd.cpp | 2 +- .../tests/test_MagnetizationDensity.cpp | 2 +- src/Numerics/OneDimGridFactory.cpp | 9 +- src/Numerics/OneDimGridFactory.h | 6 +- src/Numerics/SoaCartesianTensor.h | 4 +- src/Numerics/SoaSphericalTensor.h | 3 + src/Particle/CMakeLists.txt | 30 +- src/Particle/DistanceTable.h | 352 +--- src/Particle/DistanceTableT.h | 402 ++++ src/Particle/DynamicCoordinates.h | 95 +- src/Particle/DynamicCoordinatesT.cpp | 39 + src/Particle/DynamicCoordinatesT.h | 123 ++ src/Particle/InitMolecularSystem.h | 43 +- ...larSystem.cpp => InitMolecularSystemT.cpp} | 135 +- src/Particle/InitMolecularSystemT.h | 69 + src/Particle/Lattice/CrystalLattice.h | 2 +- src/Particle/Lattice/LRBreakupParameters.h | 2 +- src/Particle/LongRange/KContainer.h | 76 +- .../{KContainer.cpp => KContainerT.cpp} | 134 +- src/Particle/LongRange/KContainerT.h | 106 + src/Particle/LongRange/StructFact.h | 105 +- .../{StructFact.cpp => StructFactT.cpp} | 61 +- src/Particle/LongRange/StructFactT.h | 135 ++ .../LongRange/tests/test_lrhandler.cpp | 1 + src/Particle/MCCoords.cpp | 51 - src/Particle/MCCoords.hpp | 46 +- src/Particle/MCCoordsT.cpp | 62 + src/Particle/MCCoordsT.hpp | 70 + src/Particle/MCWalkerConfiguration.h | 153 +- ...uration.cpp => MCWalkerConfigurationT.cpp} | 186 +- src/Particle/MCWalkerConfigurationT.h | 185 ++ src/Particle/PSdispatcher.cpp | 2 +- src/Particle/PSdispatcher.h | 1 + src/Particle/ParticleIO/LatticeIO.cpp | 210 ++ src/Particle/ParticleIO/LatticeIO.h | 13 + src/Particle/ParticleIO/XMLParticleIO.cpp | 399 ++++ src/Particle/ParticleIO/XMLParticleIO.h | 36 + src/Particle/ParticleSet.h | 675 +----- src/Particle/ParticleSetPool.h | 100 +- ...rticleSetPool.cpp => ParticleSetPoolT.cpp} | 89 +- src/Particle/ParticleSetPoolT.h | 124 ++ ...ParticleSet.BC.cpp => ParticleSetT.BC.cpp} | 4 +- .../{ParticleSet.cpp => ParticleSetT.cpp} | 644 ++++-- src/Particle/ParticleSetT.h | 763 +++++++ src/Particle/ParticleSetTraits.h | 95 + src/Particle/RealSpacePositions.h | 45 +- src/Particle/RealSpacePositionsOMPTarget.h | 231 +-- src/Particle/RealSpacePositionsT.h | 67 + src/Particle/RealSpacePositionsTOMPTarget.h | 264 +++ src/Particle/Reptile.h | 248 +-- src/Particle/ReptileT.h | 276 +++ src/Particle/SampleStack.h | 44 +- .../{SampleStack.cpp => SampleStackT.cpp} | 34 +- src/Particle/SampleStackT.h | 62 + src/Particle/SimulationCell.h | 34 +- ...SimulationCell.cpp => SimulationCellT.cpp} | 24 +- src/Particle/SimulationCellT.h | 55 + src/Particle/SoaDistanceTableAA.h | 190 +- src/Particle/SoaDistanceTableAAOMPTarget.h | 483 +---- src/Particle/SoaDistanceTableAAT.h | 218 ++ src/Particle/SoaDistanceTableAATOMPTarget.h | 532 +++++ src/Particle/SoaDistanceTableAB.h | 129 +- src/Particle/SoaDistanceTableABOMPTarget.h | 394 +--- src/Particle/SoaDistanceTableABT.h | 160 ++ src/Particle/SoaDistanceTableABTOMPTarget.h | 436 ++++ src/Particle/VirtualParticleSet.cpp | 271 --- src/Particle/VirtualParticleSet.h | 139 +- src/Particle/VirtualParticleSetT.cpp | 288 +++ src/Particle/VirtualParticleSetT.h | 166 ++ src/Particle/WalkerConfigurations.h | 162 +- ...urations.cpp => WalkerConfigurationsT.cpp} | 52 +- src/Particle/WalkerConfigurationsT.h | 193 ++ src/Particle/createDistanceTable.h | 57 +- src/Particle/createDistanceTableAA.cpp | 98 - .../createDistanceTableAAOMPTarget.cpp | 98 - src/Particle/createDistanceTableAB.cpp | 99 - .../createDistanceTableABOMPTarget.cpp | 101 - src/Particle/createDistanceTableT.cpp | 230 +++ src/Particle/createDistanceTableT.h | 83 + .../createDistanceTableTOMPTarget.cpp | 228 +++ src/Particle/tests/CMakeLists.txt | 3 +- .../tests/test_SoaDistanceTableAA.cpp | 2 +- src/Particle/tests/test_particle_pool.cpp | 2 +- src/QMCDrivers/DMC/DMCFactoryNew.h | 2 +- src/QMCDrivers/MCPopulation.h | 10 +- src/QMCDrivers/QMCDriver.h | 3 +- src/QMCDrivers/QMCDriverFactory.h | 2 +- src/QMCDrivers/RMC/RMCFactory.h | 2 +- src/QMCDrivers/VMC/VMCBatched.h | 2 +- src/QMCDrivers/VMC/VMCFactory.h | 2 +- src/QMCDrivers/VMC/VMCFactoryNew.h | 4 +- src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h | 1 + .../WFOpt/QMCFixedSampleLinearOptimize.cpp | 1 + .../QMCFixedSampleLinearOptimizeBatched.cpp | 2 + src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h | 2 +- src/QMCDrivers/WalkerElementsRef.h | 42 +- src/QMCDrivers/tests/SetupPools.h | 3 +- src/QMCDrivers/tests/WalkerConsumer.h | 68 +- src/QMCDrivers/tests/test_Crowd.cpp | 2 +- src/QMCDrivers/tests/test_DMCBatched.cpp | 1 + src/QMCDrivers/tests/test_SFNBranch.cpp | 2 +- src/QMCHamiltonians/ECPotentialBuilder.h | 2 +- src/QMCHamiltonians/HamiltonianPool.h | 7 +- src/QMCHamiltonians/NonLocalECPotential.h | 1 + src/QMCHamiltonians/OperatorBase.h | 3 +- src/QMCHamiltonians/QMCHamiltonian.h | 2 +- src/QMCWaveFunctions/BasisSetBase.h | 40 +- .../BsplineFactory/ApplyPhaseC2R.hpp | 1 + .../BsplineFactory/BsplineReaderBase.h | 173 +- ...eReaderBase.cpp => BsplineReaderBaseT.cpp} | 74 +- .../BsplineFactory/BsplineReaderBaseT.h | 203 ++ .../BsplineFactory/BsplineSet.h | 224 +- .../BsplineFactory/BsplineSetT.h | 256 +++ .../BsplineFactory/EinsplineSetBuilder.h | 260 +-- .../EinsplineSpinorSetBuilder.h | 25 +- .../BsplineFactory/HybridRepCenterOrbitals.h | 742 +------ ...itals.cpp => HybridRepCenterOrbitalsT.cpp} | 12 +- .../BsplineFactory/HybridRepCenterOrbitalsT.h | 744 +++++++ .../BsplineFactory/HybridRepCplx.h | 221 +- .../BsplineFactory/HybridRepCplxT.h | 267 +++ .../BsplineFactory/HybridRepReal.h | 233 +-- .../BsplineFactory/HybridRepRealT.h | 277 +++ ...idRepSetReader.h => HybridRepSetReaderT.h} | 101 +- .../BsplineFactory/SplineC2C.h | 207 +- .../BsplineFactory/SplineC2COMPTarget.h | 293 +-- ...COMPTarget.cpp => SplineC2COMPTargetT.cpp} | 302 +-- .../BsplineFactory/SplineC2COMPTargetT.h | 329 +++ .../{SplineC2C.cpp => SplineC2CT.cpp} | 274 +-- .../BsplineFactory/SplineC2CT.h | 235 +++ .../BsplineFactory/SplineC2R.h | 191 +- .../BsplineFactory/SplineC2ROMPTarget.h | 299 +-- ...ROMPTarget.cpp => SplineC2ROMPTargetT.cpp} | 341 ++-- .../BsplineFactory/SplineC2ROMPTargetT.h | 334 +++ .../{SplineC2R.cpp => SplineC2RT.cpp} | 257 +-- .../BsplineFactory/SplineC2RT.h | 221 ++ .../BsplineFactory/SplineR2R.h | 202 +- .../{SplineR2R.cpp => SplineR2RT.cpp} | 271 +-- .../BsplineFactory/SplineR2RT.h | 228 +++ .../BsplineFactory/SplineSetReader.h | 266 +-- .../BsplineFactory/SplineSetReaderT.h | 295 +++ .../BsplineFactory/createBsplineReader.h | 38 +- .../BsplineFactory/createBsplineReaderT.cpp | 311 +++ .../BsplineFactory/createBsplineReaderT.h | 59 + .../BsplineFactory/createComplexDouble.cpp | 22 +- .../BsplineFactory/createComplexSingle.cpp | 22 +- .../BsplineFactory/createRealDouble.cpp | 10 +- .../BsplineFactory/createRealSingle.cpp | 16 +- src/QMCWaveFunctions/CMakeLists.txt | 92 +- src/QMCWaveFunctions/CompositeSPOSet.cpp | 197 -- src/QMCWaveFunctions/CompositeSPOSet.h | 86 +- src/QMCWaveFunctions/CompositeSPOSetT.cpp | 236 +++ src/QMCWaveFunctions/CompositeSPOSetT.h | 119 ++ src/QMCWaveFunctions/EinsplineSetBuilderT.cpp | 1816 +++++++++++++++++ src/QMCWaveFunctions/EinsplineSetBuilderT.h | 301 +++ ...der.cpp => EinsplineSpinorSetBuilderT.cpp} | 118 +- .../EinsplineSpinorSetBuilderT.h | 54 + .../ElectronGas/FreeOrbital.cpp | 264 --- .../ElectronGas/FreeOrbital.h | 53 +- .../ElectronGas/FreeOrbitalBuilder.h | 34 +- ...talBuilder.cpp => FreeOrbitalBuilderT.cpp} | 42 +- .../ElectronGas/FreeOrbitalBuilderT.h | 41 + .../ElectronGas/FreeOrbitalT.cpp | 701 +++++++ .../ElectronGas/FreeOrbitalT.h | 88 + src/QMCWaveFunctions/ExampleHeComponent.h | 2 +- .../Fermion/Backflow_ee_kSpace.h | 4 - .../Fermion/SlaterDetBuilder.h | 6 +- .../HarmonicOscillator/SHOSet.h | 126 +- .../HarmonicOscillator/SHOSetBuilder.h | 42 +- .../{SHOSetBuilder.cpp => SHOSetBuilderT.cpp} | 131 +- .../HarmonicOscillator/SHOSetBuilderT.h | 62 + .../{SHOSet.cpp => SHOSetT.cpp} | 258 +-- .../HarmonicOscillator/SHOSetT.h | 154 ++ .../Jastrow/CountingGaussian.h | 3 +- .../Jastrow/CountingGaussianRegion.h | 1 - .../Jastrow/CountingJastrow.h | 1 + .../Jastrow/eeI_JastrowBuilder.h | 3 +- .../Jastrow/kSpaceJastrowBuilder.h | 4 +- src/QMCWaveFunctions/LCAO/AOBasisBuilder.h | 54 +- ...AOBasisBuilder.cpp => AOBasisBuilderT.cpp} | 354 ++-- src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h | 75 + src/QMCWaveFunctions/LCAO/CuspCorrection.h | 65 +- .../LCAO/CuspCorrectionConstruction.h | 268 +-- ...on.cpp => CuspCorrectionConstructionT.cpp} | 705 ++++--- .../LCAO/CuspCorrectionConstructionT.h | 328 +++ src/QMCWaveFunctions/LCAO/CuspCorrectionT.h | 98 + src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h | 45 +- ...inorBuilder.cpp => LCAOSpinorBuilderT.cpp} | 108 +- .../LCAO/LCAOSpinorBuilderT.h | 68 + src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h | 102 +- ...italBuilder.cpp => LCAOrbitalBuilderT.cpp} | 622 +++--- .../LCAO/LCAOrbitalBuilderT.h | 131 ++ src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp | 991 --------- src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h | 303 +-- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp | 1022 ++++++++++ src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h | 372 ++++ .../LCAO/LCAOrbitalSetWithCorrection.cpp | 65 - .../LCAO/LCAOrbitalSetWithCorrection.h | 51 +- .../LCAO/LCAOrbitalSetWithCorrectionT.cpp | 74 + .../LCAO/LCAOrbitalSetWithCorrectionT.h | 74 + .../LCAO/MultiFunctorAdapter.h | 58 + .../LCAO/RadialOrbitalSetBuilder.h | 2 +- src/QMCWaveFunctions/LCAO/SoaAtomicBasisSet.h | 1097 +--------- .../LCAO/SoaAtomicBasisSetT.h | 1149 +++++++++++ ...pCorrection.cpp => SoaCuspCorrectionT.cpp} | 59 +- .../LCAO/SoaCuspCorrectionT.h | 117 ++ .../LCAO/SoaLocalizedBasisSet.cpp | 24 +- .../LCAO/SoaLocalizedBasisSet.h | 26 +- src/QMCWaveFunctions/OptimizableFunctorBase.h | 4 +- src/QMCWaveFunctions/OptimizableObject.h | 94 +- src/QMCWaveFunctions/OptimizableObjectT.h | 123 ++ src/QMCWaveFunctions/OrbitalSetTraits.h | 2 + src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp | 197 ++ src/QMCWaveFunctions/PlaneWave/PWBasisT.h | 349 ++++ .../PlaneWave/PWOrbitalSetT.cpp | 150 ++ .../PlaneWave/PWOrbitalSetT.h | 125 ++ .../PlaneWave/PWRealOrbitalSetT.cpp | 165 ++ .../PlaneWave/PWRealOrbitalSetT.h | 143 ++ src/QMCWaveFunctions/RotatedSPOs.h | 438 +--- .../{RotatedSPOs.cpp => RotatedSPOsT.cpp} | 1078 +++++----- src/QMCWaveFunctions/RotatedSPOsT.h | 483 +++++ src/QMCWaveFunctions/SPOSet.cpp | 406 ---- src/QMCWaveFunctions/SPOSet.h | 537 +---- src/QMCWaveFunctions/SPOSetBuilder.cpp | 182 -- src/QMCWaveFunctions/SPOSetBuilder.h | 65 +- src/QMCWaveFunctions/SPOSetBuilderFactory.h | 52 +- ...rFactory.cpp => SPOSetBuilderFactoryT.cpp} | 159 +- src/QMCWaveFunctions/SPOSetBuilderFactoryT.h | 73 + src/QMCWaveFunctions/SPOSetBuilderT.cpp | 449 ++++ src/QMCWaveFunctions/SPOSetBuilderT.h | 94 + src/QMCWaveFunctions/SPOSetInfo.h | 3 +- src/QMCWaveFunctions/SPOSetScannerT.h | 254 +++ src/QMCWaveFunctions/SPOSetT.cpp | 440 ++++ src/QMCWaveFunctions/SPOSetT.h | 612 ++++++ src/QMCWaveFunctions/SpinorSet.h | 198 +- .../{SpinorSet.cpp => SpinorSetT.cpp} | 364 ++-- src/QMCWaveFunctions/SpinorSetT.h | 242 +++ src/QMCWaveFunctions/TrialWaveFunction.h | 1 + src/QMCWaveFunctions/VariableSet.h | 313 +-- .../{VariableSet.cpp => VariableSetT.cpp} | 80 +- src/QMCWaveFunctions/VariableSetT.h | 336 +++ src/QMCWaveFunctions/WaveFunctionPool.h | 5 +- src/QMCWaveFunctions/tests/CMakeLists.txt | 13 +- src/QMCWaveFunctions/tests/ConstantSPOSet.cpp | 100 - src/QMCWaveFunctions/tests/ConstantSPOSet.h | 69 +- .../tests/ConstantSPOSetT.cpp | 124 ++ src/QMCWaveFunctions/tests/ConstantSPOSetT.h | 96 + src/QMCWaveFunctions/tests/FakeSPOT.cpp | 159 ++ src/QMCWaveFunctions/tests/FakeSPOT.h | 61 + .../tests/test_ConstantSPOSetT.cpp | 136 ++ .../tests/test_DiracDeterminantBatched.cpp | 4 +- .../tests/test_LCAO_diamondC_2x1x1.cpp | 8 +- src/QMCWaveFunctions/tests/test_MO.cpp | 3 +- src/QMCWaveFunctions/tests/test_MO_spinor.cpp | 53 +- .../tests/test_RotatedSPOs.cpp | 868 -------- .../tests/test_RotatedSPOsT.cpp | 909 +++++++++ .../tests/test_TrialWaveFunction_He.cpp | 1 + .../tests/test_cartesian_ao.cpp | 1 + src/QMCWaveFunctions/tests/test_einset.cpp | 1 + .../tests/test_einset_NiO_a16.cpp | 1 + .../tests/test_einset_spinor.cpp | 6 +- .../tests/test_example_he.cpp | 1 + src/QMCWaveFunctions/tests/test_hybridrep.cpp | 1 + .../tests/test_pyscf_complex_MO.cpp | 2 +- .../tests/test_soa_cusp_corr.cpp | 34 +- .../tests/test_spline_applyrotation.cpp | 1 + src/mpi/mpi_datatype.h | 2 + src/spline/test_bspline.h | 1 - src/type_traits/complex_help.hpp | 22 + 273 files changed, 27367 insertions(+), 18170 deletions(-) create mode 100644 src/Particle/DistanceTableT.h create mode 100644 src/Particle/DynamicCoordinatesT.cpp create mode 100644 src/Particle/DynamicCoordinatesT.h rename src/Particle/{InitMolecularSystem.cpp => InitMolecularSystemT.cpp} (70%) create mode 100644 src/Particle/InitMolecularSystemT.h rename src/Particle/LongRange/{KContainer.cpp => KContainerT.cpp} (68%) create mode 100644 src/Particle/LongRange/KContainerT.h rename src/Particle/LongRange/{StructFact.cpp => StructFactT.cpp} (82%) create mode 100644 src/Particle/LongRange/StructFactT.h delete mode 100644 src/Particle/MCCoords.cpp create mode 100644 src/Particle/MCCoordsT.cpp create mode 100644 src/Particle/MCCoordsT.hpp rename src/Particle/{MCWalkerConfiguration.cpp => MCWalkerConfigurationT.cpp} (52%) create mode 100644 src/Particle/MCWalkerConfigurationT.h rename src/Particle/{ParticleSetPool.cpp => ParticleSetPoolT.cpp} (74%) create mode 100644 src/Particle/ParticleSetPoolT.h rename src/Particle/{ParticleSet.BC.cpp => ParticleSetT.BC.cpp} (98%) rename src/Particle/{ParticleSet.cpp => ParticleSetT.cpp} (56%) create mode 100644 src/Particle/ParticleSetT.h create mode 100644 src/Particle/ParticleSetTraits.h create mode 100644 src/Particle/RealSpacePositionsT.h create mode 100644 src/Particle/RealSpacePositionsTOMPTarget.h create mode 100644 src/Particle/ReptileT.h rename src/Particle/{SampleStack.cpp => SampleStackT.cpp} (61%) create mode 100644 src/Particle/SampleStackT.h rename src/Particle/{SimulationCell.cpp => SimulationCellT.cpp} (76%) create mode 100644 src/Particle/SimulationCellT.h create mode 100644 src/Particle/SoaDistanceTableAAT.h create mode 100644 src/Particle/SoaDistanceTableAATOMPTarget.h create mode 100644 src/Particle/SoaDistanceTableABT.h create mode 100644 src/Particle/SoaDistanceTableABTOMPTarget.h delete mode 100644 src/Particle/VirtualParticleSet.cpp create mode 100644 src/Particle/VirtualParticleSetT.cpp create mode 100644 src/Particle/VirtualParticleSetT.h rename src/Particle/{WalkerConfigurations.cpp => WalkerConfigurationsT.cpp} (70%) create mode 100644 src/Particle/WalkerConfigurationsT.h delete mode 100644 src/Particle/createDistanceTableAA.cpp delete mode 100644 src/Particle/createDistanceTableAAOMPTarget.cpp delete mode 100644 src/Particle/createDistanceTableAB.cpp delete mode 100644 src/Particle/createDistanceTableABOMPTarget.cpp create mode 100644 src/Particle/createDistanceTableT.cpp create mode 100644 src/Particle/createDistanceTableT.h create mode 100644 src/Particle/createDistanceTableTOMPTarget.cpp rename src/QMCWaveFunctions/BsplineFactory/{BsplineReaderBase.cpp => BsplineReaderBaseT.cpp} (77%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h rename src/QMCWaveFunctions/BsplineFactory/{HybridRepCenterOrbitals.cpp => HybridRepCenterOrbitalsT.cpp} (62%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h rename src/QMCWaveFunctions/BsplineFactory/{HybridRepSetReader.h => HybridRepSetReaderT.h} (88%) rename src/QMCWaveFunctions/BsplineFactory/{SplineC2COMPTarget.cpp => SplineC2COMPTargetT.cpp} (86%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h rename src/QMCWaveFunctions/BsplineFactory/{SplineC2C.cpp => SplineC2CT.cpp} (83%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h rename src/QMCWaveFunctions/BsplineFactory/{SplineC2ROMPTarget.cpp => SplineC2ROMPTargetT.cpp} (88%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h rename src/QMCWaveFunctions/BsplineFactory/{SplineC2R.cpp => SplineC2RT.cpp} (89%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h rename src/QMCWaveFunctions/BsplineFactory/{SplineR2R.cpp => SplineR2RT.cpp} (71%) create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h create mode 100644 src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp create mode 100644 src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h delete mode 100644 src/QMCWaveFunctions/CompositeSPOSet.cpp create mode 100644 src/QMCWaveFunctions/CompositeSPOSetT.cpp create mode 100644 src/QMCWaveFunctions/CompositeSPOSetT.h create mode 100644 src/QMCWaveFunctions/EinsplineSetBuilderT.cpp create mode 100644 src/QMCWaveFunctions/EinsplineSetBuilderT.h rename src/QMCWaveFunctions/{BsplineFactory/EinsplineSpinorSetBuilder.cpp => EinsplineSpinorSetBuilderT.cpp} (62%) create mode 100644 src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h delete mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp rename src/QMCWaveFunctions/ElectronGas/{FreeOrbitalBuilder.cpp => FreeOrbitalBuilderT.cpp} (54%) create mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h create mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp create mode 100644 src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h rename src/QMCWaveFunctions/HarmonicOscillator/{SHOSetBuilder.cpp => SHOSetBuilderT.cpp} (55%) create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h rename src/QMCWaveFunctions/HarmonicOscillator/{SHOSet.cpp => SHOSetT.cpp} (66%) create mode 100644 src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h rename src/QMCWaveFunctions/LCAO/{AOBasisBuilder.cpp => AOBasisBuilderT.cpp} (65%) create mode 100644 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h rename src/QMCWaveFunctions/LCAO/{CuspCorrectionConstruction.cpp => CuspCorrectionConstructionT.cpp} (65%) create mode 100644 src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h create mode 100644 src/QMCWaveFunctions/LCAO/CuspCorrectionT.h rename src/QMCWaveFunctions/LCAO/{LCAOSpinorBuilder.cpp => LCAOSpinorBuilderT.cpp} (55%) create mode 100644 src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h rename src/QMCWaveFunctions/LCAO/{LCAOrbitalBuilder.cpp => LCAOrbitalBuilderT.cpp} (60%) create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h delete mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h delete mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp create mode 100644 src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h create mode 100644 src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h rename src/QMCWaveFunctions/LCAO/{SoaCuspCorrection.cpp => SoaCuspCorrectionT.cpp} (71%) create mode 100644 src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h create mode 100644 src/QMCWaveFunctions/OptimizableObjectT.h create mode 100644 src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp create mode 100644 src/QMCWaveFunctions/PlaneWave/PWBasisT.h create mode 100644 src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp create mode 100644 src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h create mode 100644 src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp create mode 100644 src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h rename src/QMCWaveFunctions/{RotatedSPOs.cpp => RotatedSPOsT.cpp} (54%) create mode 100644 src/QMCWaveFunctions/RotatedSPOsT.h delete mode 100644 src/QMCWaveFunctions/SPOSet.cpp delete mode 100644 src/QMCWaveFunctions/SPOSetBuilder.cpp rename src/QMCWaveFunctions/{SPOSetBuilderFactory.cpp => SPOSetBuilderFactoryT.cpp} (57%) create mode 100644 src/QMCWaveFunctions/SPOSetBuilderFactoryT.h create mode 100644 src/QMCWaveFunctions/SPOSetBuilderT.cpp create mode 100644 src/QMCWaveFunctions/SPOSetBuilderT.h create mode 100644 src/QMCWaveFunctions/SPOSetScannerT.h create mode 100644 src/QMCWaveFunctions/SPOSetT.cpp create mode 100644 src/QMCWaveFunctions/SPOSetT.h rename src/QMCWaveFunctions/{SpinorSet.cpp => SpinorSetT.cpp} (55%) create mode 100644 src/QMCWaveFunctions/SpinorSetT.h rename src/QMCWaveFunctions/{VariableSet.cpp => VariableSetT.cpp} (81%) create mode 100644 src/QMCWaveFunctions/VariableSetT.h delete mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSet.cpp create mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp create mode 100644 src/QMCWaveFunctions/tests/ConstantSPOSetT.h create mode 100644 src/QMCWaveFunctions/tests/FakeSPOT.cpp create mode 100644 src/QMCWaveFunctions/tests/FakeSPOT.h create mode 100644 src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp delete mode 100644 src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp create mode 100644 src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml index bdd90a687c..1665274db4 100644 --- a/.github/workflows/ci-github-actions.yaml +++ b/.github/workflows/ci-github-actions.yaml @@ -5,10 +5,12 @@ on: branches: - develop - main + - ref-add-SPOSetT pull_request: branches: - develop - main + - ref-add-SPOSetT jobs: linux: diff --git a/src/Estimators/EstimatorManagerBase.h b/src/Estimators/EstimatorManagerBase.h index 565ff60c37..57f43bc9a1 100644 --- a/src/Estimators/EstimatorManagerBase.h +++ b/src/Estimators/EstimatorManagerBase.h @@ -30,10 +30,10 @@ #include "OhmmsPETE/OhmmsVector.h" #include "io/hdf/hdf_archive.h" #include +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { -class MCWalkerConfiguration; class QMCHamiltonian; class CollectablesEstimator; @@ -52,7 +52,7 @@ class EstimatorManagerBase using EstimatorType = ScalarEstimatorBase; using BufferType = std::vector; - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; ///default constructor EstimatorManagerBase(Communicate* c = 0); diff --git a/src/Estimators/EstimatorManagerCrowd.h b/src/Estimators/EstimatorManagerCrowd.h index 3ca7eaf9eb..c92e8e6b3d 100644 --- a/src/Estimators/EstimatorManagerCrowd.h +++ b/src/Estimators/EstimatorManagerCrowd.h @@ -22,6 +22,7 @@ #include "Estimators/EstimatorManagerNew.h" #include "Particle/Walker.h" #include "OhmmsPETE/OhmmsVector.h" +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { @@ -38,7 +39,7 @@ class QMCHamiltonian; class EstimatorManagerCrowd { public: - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using RealType = EstimatorManagerNew::RealType; using FullPrecRealType = EstimatorManagerNew::FullPrecRealType; diff --git a/src/Estimators/OperatorEstBase.h b/src/Estimators/OperatorEstBase.h index fe107ea6ce..9ecca53c89 100644 --- a/src/Estimators/OperatorEstBase.h +++ b/src/Estimators/OperatorEstBase.h @@ -23,6 +23,7 @@ #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "type_traits/DataLocality.h" #include "hdf/hdf_archive.h" +#include "Particle/MCWalkerConfiguration.h" #include namespace qmcplusplus @@ -41,7 +42,7 @@ class OperatorEstBase public: using QMCT = QMCTraits; using FullPrecRealType = QMCT::FullPrecRealType; - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using Data = std::vector; diff --git a/src/Estimators/ScalarEstimatorBase.h b/src/Estimators/ScalarEstimatorBase.h index 9ccaae2dc7..c848aee332 100644 --- a/src/Estimators/ScalarEstimatorBase.h +++ b/src/Estimators/ScalarEstimatorBase.h @@ -42,7 +42,7 @@ struct ScalarEstimatorBase using RealType = QMCTraits::FullPrecRealType; using accumulator_type = accumulator_set; using Walker_t = MCWalkerConfiguration::Walker_t; - using MCPWalker = Walker; + using MCPWalker = Walker_t; using WalkerIterator = MCWalkerConfiguration::const_iterator; using RecordListType = RecordNamedProperty; diff --git a/src/Estimators/tests/test_EstimatorManagerCrowd.cpp b/src/Estimators/tests/test_EstimatorManagerCrowd.cpp index d3f8d42a73..2ce592a0d0 100644 --- a/src/Estimators/tests/test_EstimatorManagerCrowd.cpp +++ b/src/Estimators/tests/test_EstimatorManagerCrowd.cpp @@ -105,7 +105,7 @@ TEST_CASE("EstimatorManagerCrowd PerParticleHamiltonianLogger integration", "[es EstimatorManagerCrowd emc(emn); - using MCPWalker = Walker; + using MCPWalker = EstimatorManagerCrowd::MCPWalker; std::vector walkers(num_walkers, MCPWalker(pset.getTotalNum())); diff --git a/src/Estimators/tests/test_MagnetizationDensity.cpp b/src/Estimators/tests/test_MagnetizationDensity.cpp index 68d97e8597..7bc55db1cd 100644 --- a/src/Estimators/tests/test_MagnetizationDensity.cpp +++ b/src/Estimators/tests/test_MagnetizationDensity.cpp @@ -289,7 +289,7 @@ TEST_CASE("MagnetizationDensity::IntegrationTest", "[estimators]") using GradVector = Vector; using ValueMatrix = Matrix; using PropertySetType = OperatorBase::PropertySetType; - using MCPWalker = Walker; + using MCPWalker = Walker, LatticeParticleTraits>; using Data = MagnetizationDensity::Data; using GradMatrix = Matrix; using namespace testing; diff --git a/src/Numerics/OneDimGridFactory.cpp b/src/Numerics/OneDimGridFactory.cpp index 16a17ec9b3..bc90a9f505 100644 --- a/src/Numerics/OneDimGridFactory.cpp +++ b/src/Numerics/OneDimGridFactory.cpp @@ -13,19 +13,21 @@ #include "OneDimGridFactory.h" +#include "Configuration.h" #include "OhmmsData/AttributeSet.h" #include "Message/UniformCommunicateError.h" namespace qmcplusplus { -std::unique_ptr OneDimGridFactory::createGrid(xmlNodePtr cur) +template +std::unique_ptr::GridType> OneDimGridFactory::createGrid(xmlNodePtr cur) { std::unique_ptr agrid; RealType ri = 1e-5; RealType rf = 100.0; RealType ascale = -1.0e0; RealType astep = 1.25e-2; - IndexType npts = 1001; + QMCTraits::IndexType npts = 1001; std::string gridType("log"); std::string gridID("invalid"); OhmmsAttributeSet radAttrib; @@ -74,4 +76,7 @@ std::unique_ptr OneDimGridFactory::createGrid(xmlNo } return agrid; } + +template struct OneDimGridFactory; +template struct OneDimGridFactory; } // namespace qmcplusplus diff --git a/src/Numerics/OneDimGridFactory.h b/src/Numerics/OneDimGridFactory.h index 6365db25aa..d27b1fb904 100644 --- a/src/Numerics/OneDimGridFactory.h +++ b/src/Numerics/OneDimGridFactory.h @@ -14,15 +14,17 @@ #ifndef QMCPLUSPLUS_ONEDIMGRIDFACTORY_H #define QMCPLUSPLUS_ONEDIMGRIDFACTORY_H -#include "Configuration.h" #include "Numerics/OneDimGridFunctor.h" +#include "Numerics/LibxmlNumericIO.h" namespace qmcplusplus { /** Factory class using Singleton pattern */ -struct OneDimGridFactory : public QMCTraits +template +struct OneDimGridFactory { + using RealType = T; ///typedef of the one-dimensional grid using GridType = OneDimGridBase; diff --git a/src/Numerics/SoaCartesianTensor.h b/src/Numerics/SoaCartesianTensor.h index 497b9b0480..523e38058b 100644 --- a/src/Numerics/SoaCartesianTensor.h +++ b/src/Numerics/SoaCartesianTensor.h @@ -29,7 +29,7 @@ namespace qmcplusplus { /** CartesianTensor according to Gamess order - * @tparam T, value_type, e.g. double + * @tparam T, ValueType, e.g. double * * Original implementation Numerics/CartesianTensor.h * Modified to use SoA for cXYZ and used by SoaAtomicBasisSet @@ -40,12 +40,12 @@ template class SoaCartesianTensor { private: - using value_type = T; using ggg_type = TinyVector, 3>; using OffloadVector = Vector>; using OffloadArray2D = Array>; using OffloadArray3D = Array>; using OffloadArray4D = Array>; + using ValueType = T; ///maximum angular momentum int Lmax; diff --git a/src/Numerics/SoaSphericalTensor.h b/src/Numerics/SoaSphericalTensor.h index 8e7e2ffbb3..29210f821d 100644 --- a/src/Numerics/SoaSphericalTensor.h +++ b/src/Numerics/SoaSphericalTensor.h @@ -39,11 +39,14 @@ namespace qmcplusplus template class SoaSphericalTensor { + private: using OffloadVector = Vector>; using OffloadArray2D = Array>; using OffloadArray3D = Array>; using OffloadArray4D = Array>; + using ValueType = T; + ///maximum angular momentum for the center int Lmax; /// Normalization factors diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt index 1965106fc8..01d8d23cfa 100644 --- a/src/Particle/CMakeLists.txt +++ b/src/Particle/CMakeLists.txt @@ -13,24 +13,23 @@ # create libqmcparticle #################################### set(PARTICLE - InitMolecularSystem.cpp - SimulationCell.cpp - ParticleSetPool.cpp - ParticleSet.cpp + InitMolecularSystemT.cpp + SimulationCellT.cpp + ParticleSetPoolT.cpp + ParticleSetT.cpp PSdispatcher.cpp - VirtualParticleSet.cpp - ParticleSet.BC.cpp + VirtualParticleSetT.cpp DynamicCoordinatesBuilder.cpp - MCCoords.cpp - MCWalkerConfiguration.cpp - WalkerConfigurations.cpp + DynamicCoordinatesT.cpp + MCCoordsT.cpp + MCWalkerConfigurationT.cpp + WalkerConfigurationsT.cpp SpeciesSet.cpp - SampleStack.cpp - createDistanceTableAA.cpp - createDistanceTableAB.cpp + SampleStackT.cpp + createDistanceTableT.cpp HDFWalkerInputManager.cpp - LongRange/KContainer.cpp - LongRange/StructFact.cpp + LongRange/KContainerT.cpp + LongRange/StructFactT.cpp LongRange/LPQHIBasis.cpp LongRange/LPQHISRCoulombBasis.cpp LongRange/EwaldHandlerQuasi2D.cpp @@ -51,8 +50,7 @@ target_include_directories(qmcparticle PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(qmcparticle PRIVATE platform_cpu_LA) target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime) set(PARTICLE_OMPTARGET_SRCS - createDistanceTableAAOMPTarget.cpp - createDistanceTableABOMPTarget.cpp) + createDistanceTableTOMPTarget.cpp) add_library(qmcparticle_omptarget OBJECT ${PARTICLE_OMPTARGET_SRCS}) diff --git a/src/Particle/DistanceTable.h b/src/Particle/DistanceTable.h index 3175be4596..12cf0ac941 100644 --- a/src/Particle/DistanceTable.h +++ b/src/Particle/DistanceTable.h @@ -16,355 +16,13 @@ #ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPL_H #define QMCPLUSPLUS_DISTANCETABLEDATAIMPL_H -#include "Particle/ParticleSet.h" -#include -#include "OhmmsPETE/OhmmsVector.h" -#include "OhmmsPETE/OhmmsMatrix.h" -#include "CPU/SIMD/aligned_allocator.hpp" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "DTModes.h" +#include "Configuration.h" +#include "Particle/DistanceTableT.h" namespace qmcplusplus { -class ResourceCollection; - -/** @ingroup nnlist - * @brief Abstract class to manage operations on pair data between two ParticleSets. - * - * Each DistanceTable object is defined by Source and Target of ParticleSet types. - * This base class doesn't contain storage. It is intended for update/compute invoked by ParticleSet. - * Derived AA/AB classes handle the actual storage and data access. - */ -class DistanceTable -{ -public: - static constexpr unsigned DIM = OHMMS_DIM; - - using IndexType = QMCTraits::IndexType; - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - using DistRow = Vector>; - using DisplRow = VectorSoaContainer; - -protected: - // FIXME. once DT takes only DynamicCoordinates, change this type as well. - const ParticleSet& origin_; - - const size_t num_sources_; - const size_t num_targets_; - - ///name of the table - const std::string name_; - - ///operation modes defined by DTModes - DTModes modes_; - -public: - ///constructor using source and target ParticleSet - DistanceTable(const ParticleSet& source, const ParticleSet& target, DTModes modes) - : origin_(source), - num_sources_(source.getTotalNum()), - num_targets_(target.getTotalNum()), - name_(source.getName() + "_" + target.getName()), - modes_(modes) - {} - - /// copy constructor. deleted - DistanceTable(const DistanceTable&) = delete; - - ///virutal destructor - virtual ~DistanceTable() = default; - - ///get modes - inline DTModes getModes() const { return modes_; } - - ///set modes - inline void setModes(DTModes modes) { modes_ = modes; } - - ///return the name of table - inline const std::string& getName() const { return name_; } - - ///returns the reference the origin particleset - const ParticleSet& get_origin() const { return origin_; } - - ///returns the number of centers - inline size_t centers() const { return origin_.getTotalNum(); } - - ///returns the number of centers - inline size_t targets() const { return num_targets_; } - - ///returns the number of source particles - inline size_t sources() const { return num_sources_; } - - /** evaluate the full Distance Table - * @param P the target particle set - */ - virtual void evaluate(ParticleSet& P) = 0; - virtual void mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].evaluate(p_list[iw]); - } - - /** recompute multi walker internal data, recompute - * @param dt_list the distance table batch - * @param p_list the target particle set batch - * @param recompute if true, must recompute. Otherwise, implementation dependent. - */ - virtual void mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& recompute) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - if (recompute[iw]) - dt_list[iw].evaluate(p_list[iw]); - } - - /** evaluate the temporary pair relations when a move is proposed - * @param P the target particle set - * @param rnew proposed new position - * @param iat the particle to be moved - * @param prepare_old if true, prepare (temporary) old distances and displacements for using getOldDists and getOldDispls functions in acceptMove. - * - * Note: some distance table consumers (WaveFunctionComponent) have optimized code paths which require prepare_old = true for accepting a move. - * Drivers/Hamiltonians know whether moves will be accepted or not and manage this flag when calling ParticleSet::makeMoveXXX functions. - */ - virtual void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old = true) = 0; - - /** walker batched version of move. this function may be implemented asynchronously. - * Additional synchroniziation for collecting results should be handled by the caller. - * If DTModes::NEED_TEMP_DATA_ON_HOST, host data will be updated. - * If no consumer requests data on the host, the transfer is skipped. - */ - virtual void mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& rnew_list, - const IndexType iat, - bool prepare_old = true) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); - } - - /** update the distance table by the pair relations from the temporal position. - * Used when a move is accepted in regular mode - * @param iat the particle with an accepted move - */ - virtual void update(IndexType jat) = 0; - - /** fill partially the distance table by the pair relations from the temporary or old particle position. - * Used in forward mode when a move is reject - * @param iat the particle with an accepted move - * @param from_temp if true, copy from temp. if false, copy from old - */ - virtual void updatePartial(IndexType jat, bool from_temp) - { - if (from_temp) - update(jat); - } - - /** walker batched version of updatePartial. - * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and host distance table will not be updated. - */ - virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, - const std::vector& from_temp) - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - /** finalize distance table calculation after particle-by-particle moves - * if update() doesn't make the table up-to-date during p-by-p moves - * finalizePbyP takes action to bring the table up-to-date - */ - virtual void finalizePbyP(const ParticleSet& P) {} - - /** walker batched version of finalizePbyP - * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not updated at all during p-by-p - * Thus, a recompute is necessary to update the whole host distance table for consumers like the Coulomb potential. - */ - virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const - { - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].finalizePbyP(p_list[iw]); - } - - /** find the first nearest neighbor - * @param iat source particle id - * @param r distance - * @param dr displacement - * @param newpos if true, use the data in temp_r_ and temp_dr_ for the proposed move. - * if false, use the data in distance_[iat] and displacements_[iat] - * @return the id of the nearest particle, -1 not found - */ - virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; - - [[noreturn]] inline void print(std::ostream& os) - { - throw std::runtime_error("DistanceTable::print is not supported"); - } - - /// initialize a shared resource and hand it to a collection - virtual void createResource(ResourceCollection& collection) const {} - - /// acquire a shared resource from a collection - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const - {} - - /// return a shared resource to a collection - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const - {} -}; - -/** AA type of DistanceTable containing storage */ -class DistanceTableAA : public DistanceTable -{ -protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| - * Note: Derived classes decide if it is a memory view or the actual storage - * For only the lower triangle (j=i terms as the nature of operator[]. - * When the storage of the table is allocated as a single memory segment, - * out-of-bound access is still within the segment and - * thus doesn't trigger an alarm by the address sanitizer. - */ - std::vector distances_; - - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] - * Note: Derived classes decide if it is a memory view or the actual storage - * only the lower triangle (j displacements_; - - /// temp_r - DistRow temp_r_; - - /// temp_dr - DisplRow temp_dr_; - - /// old distances - DistRow old_r_; - - /// old displacements - DisplRow old_dr_; - -public: - ///constructor using source and target ParticleSet - DistanceTableAA(const ParticleSet& target, DTModes modes) : DistanceTable(target, target, modes) {} - - /** return full table distances - */ - const std::vector& getDistances() const { return distances_; } - - /** return full table displacements - */ - const std::vector& getDisplacements() const { return displacements_; } - - /** return a row of distances for a given target particle - */ - const DistRow& getDistRow(int iel) const { return distances_[iel]; } - - /** return a row of displacements for a given target particle - */ - const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - - /** return the temporary distances when a move is proposed - */ - const DistRow& getTempDists() const { return temp_r_; } - - /** return the temporary displacements when a move is proposed - */ - const DisplRow& getTempDispls() const { return temp_dr_; } - - /** return old distances set up by move() for optimized distance table consumers - */ - const DistRow& getOldDists() const { return old_r_; } - - /** return old displacements set up by move() for optimized distance table consumers - */ - const DisplRow& getOldDispls() const { return old_dr_; } - - virtual size_t get_num_particls_stored() const { return 0; } - - /// return multi walker temporary pair distance table data pointer - [[noreturn]] virtual const RealType* getMultiWalkerTempDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer for temp not supported"); - } - - virtual const RealType* mw_evalDistsInRange(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - size_t range_begin, - size_t range_end) const - { - return nullptr; - } -}; - -/** AB type of DistanceTable containing storage */ -class DistanceTableAB : public DistanceTable -{ -protected: - /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| - * Note: Derived classes decide if it is a memory view or the actual storage - */ - std::vector distances_; - - /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] - * Note: Derived classes decide if it is a memory view or the actual storage - */ - std::vector displacements_; - - /// temp_r - DistRow temp_r_; - - /// temp_dr - DisplRow temp_dr_; - -public: - ///constructor using source and target ParticleSet - DistanceTableAB(const ParticleSet& source, const ParticleSet& target, DTModes modes) - : DistanceTable(source, target, modes) - {} - - /** return full table distances - */ - const std::vector& getDistances() const { return distances_; } - - /** return full table displacements - */ - const std::vector& getDisplacements() const { return displacements_; } - - /** return a row of distances for a given target particle - */ - const DistRow& getDistRow(int iel) const { return distances_[iel]; } - - /** return a row of displacements for a given target particle - */ - const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } - - /** return the temporary distances when a move is proposed - */ - const DistRow& getTempDists() const { return temp_r_; } - - /** return the temporary displacements when a move is proposed - */ - const DisplRow& getTempDispls() const { return temp_dr_; } - - /// return multi-walker full (all pairs) distance table data pointer - [[noreturn]] virtual const RealType* getMultiWalkerDataPtr() const - { - throw std::runtime_error(name_ + " multi walker data pointer not supported"); - } - - /// return stride of per target pctl data. full table data = stride * num of target particles - [[noreturn]] virtual size_t getPerTargetPctlStrideSize() const - { - throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported"); - } -}; +using DistanceTable = DistanceTableT; +using DistanceTableAA = DistanceTableAAT; +using DistanceTableAB = DistanceTableABT; } // namespace qmcplusplus #endif diff --git a/src/Particle/DistanceTableT.h b/src/Particle/DistanceTableT.h new file mode 100644 index 0000000000..8e9cad3f9a --- /dev/null +++ b/src/Particle/DistanceTableT.h @@ -0,0 +1,402 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H +#define QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H + +#include + +#include "CPU/SIMD/aligned_allocator.hpp" +#include "DTModes.h" +#include "OhmmsPETE/OhmmsMatrix.h" +#include "OhmmsPETE/OhmmsVector.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/ParticleSetT.h" +#include "Particle/ParticleSetTraits.h" + +namespace qmcplusplus +{ +class ResourceCollection; + +/** @ingroup nnlist + * @brief Abstract class to manage operations on pair data between two + * ParticleSets. + * + * Each DistanceTable object is defined by Source and Target of ParticleSet + * types. This base class doesn't contain storage. It is intended for + * update/compute invoked by ParticleSet. Derived AA/AB classes handle the + * actual storage and data access. + */ +template +class DistanceTableT +{ +public: + static constexpr unsigned DIM = OHMMS_DIM; + + using IndexType = typename ParticleSetTraits::IndexType; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using DistRow = Vector>; + using DisplRow = VectorSoaContainer; + +protected: + // FIXME. once DT takes only DynamicCoordinates, change this type as well. + const ParticleSetT& origin_; + + const size_t num_sources_; + const size_t num_targets_; + + /// name of the table + const std::string name_; + + /// operation modes defined by DTModes + DTModes modes_; + +public: + /// constructor using source and target ParticleSet + DistanceTableT(const ParticleSetT& source, const ParticleSetT& target, DTModes modes) + : origin_(source), + num_sources_(source.getTotalNum()), + num_targets_(target.getTotalNum()), + name_(source.getName() + "_" + target.getName()), + modes_(modes) + {} + + /// copy constructor. deleted + DistanceTableT(const DistanceTableT&) = delete; + + /// virutal destructor + virtual ~DistanceTableT() = default; + + /// get modes + inline DTModes getModes() const { return modes_; } + + /// set modes + inline void setModes(DTModes modes) { modes_ = modes; } + + /// return the name of table + inline const std::string& getName() const { return name_; } + + /// returns the reference the origin particleset + const ParticleSetT& get_origin() const { return origin_; } + + /// returns the number of centers + inline size_t centers() const { return origin_.getTotalNum(); } + + /// returns the number of centers + inline size_t targets() const { return num_targets_; } + + /// returns the number of source particles + inline size_t sources() const { return num_sources_; } + + /** evaluate the full Distance Table + * @param P the target particle set + */ + virtual void evaluate(ParticleSetT& P) = 0; + virtual void mw_evaluate(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].evaluate(p_list[iw]); + } + + /** recompute multi walker internal data, recompute + * @param dt_list the distance table batch + * @param p_list the target particle set batch + * @param recompute if true, must recompute. Otherwise, implementation + * dependent. + */ + virtual void mw_recompute(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& recompute) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + if (recompute[iw]) + dt_list[iw].evaluate(p_list[iw]); + } + + /** evaluate the temporary pair relations when a move is proposed + * @param P the target particle set + * @param rnew proposed new position + * @param iat the particle to be moved + * @param prepare_old if true, prepare (temporary) old distances and + * displacements for using getOldDists and getOldDispls functions in + * acceptMove. + * + * Note: some distance table consumers (WaveFunctionComponent) have + * optimized code paths which require prepare_old = true for accepting a + * move. Drivers/Hamiltonians know whether moves will be accepted or not and + * manage this flag when calling ParticleSet::makeMoveXXX functions. + */ + virtual void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old = true) = 0; + + /** walker batched version of move. this function may be implemented + * asynchronously. Additional synchroniziation for collecting results should + * be handled by the caller. If DTModes::NEED_TEMP_DATA_ON_HOST, host data + * will be updated. If no consumer requests data on the host, the transfer + * is skipped. + */ + virtual void mw_move(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& rnew_list, + const IndexType iat, + bool prepare_old = true) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old); + } + + /** update the distance table by the pair relations from the temporal + * position. Used when a move is accepted in regular mode + * @param iat the particle with an accepted move + */ + virtual void update(IndexType jat) = 0; + + /** fill partially the distance table by the pair relations from the + * temporary or old particle position. Used in forward mode when a move is + * reject + * @param iat the particle with an accepted move + * @param from_temp if true, copy from temp. if false, copy from old + */ + virtual void updatePartial(IndexType jat, bool from_temp) + { + if (from_temp) + update(jat); + } + + /** walker batched version of updatePartial. + * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and + * host distance table will not be updated. + */ + virtual void mw_updatePartial(const RefVectorWithLeader& dt_list, + IndexType jat, + const std::vector& from_temp) + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].updatePartial(jat, from_temp[iw]); + } + + /** finalize distance table calculation after particle-by-particle moves + * if update() doesn't make the table up-to-date during p-by-p moves + * finalizePbyP takes action to bring the table up-to-date + */ + virtual void finalizePbyP(const ParticleSetT& P) {} + + /** walker batched version of finalizePbyP + * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not + * updated at all during p-by-p Thus, a recompute is necessary to update the + * whole host distance table for consumers like the Coulomb potential. + */ + virtual void mw_finalizePbyP(const RefVectorWithLeader& dt_list, + const RefVectorWithLeader>& p_list) const + { + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].finalizePbyP(p_list[iw]); + } + + /** find the first nearest neighbor + * @param iat source particle id + * @param r distance + * @param dr displacement + * @param newpos if true, use the data in temp_r_ and temp_dr_ for the + * proposed move. if false, use the data in distance_[iat] and + * displacements_[iat] + * @return the id of the nearest particle, -1 not found + */ + virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0; + + [[noreturn]] inline void print(std::ostream& os) + { + throw std::runtime_error("DistanceTable::print is not supported"); + } + + /// initialize a shared resource and hand it to a collection + virtual void createResource(ResourceCollection& collection) const {} + + /// acquire a shared resource from a collection + virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const + {} + + /// return a shared resource to a collection + virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const + {} +}; + +/** AA type of DistanceTable containing storage */ +template +class DistanceTableAAT : public DistanceTableT +{ +public: + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; + +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual + * storage For only the lower triangle (j=i terms as the nature of + * operator[]. When the storage of the table is allocated as a single memory + * segment, out-of-bound access is still within the segment and thus doesn't + * trigger an alarm by the address sanitizer. + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + * r_A1[i] Note: Derived classes decide if it is a memory view or the actual + * storage only the lower triangle (j displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + + /// old distances + DistRow old_r_; + + /// old displacements + DisplRow old_dr_; + +public: + /// constructor using source and target ParticleSet + DistanceTableAAT(const ParticleSetT& target, DTModes modes) : DistanceTableT(target, target, modes) {} + + /** return full table distances + */ + const std::vector& getDistances() const { return distances_; } + + /** return full table displacements + */ + const std::vector& getDisplacements() const { return displacements_; } + + /** return a row of distances for a given target particle + */ + const DistRow& getDistRow(int iel) const { return distances_[iel]; } + + /** return a row of displacements for a given target particle + */ + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } + + /** return the temporary distances when a move is proposed + */ + const DistRow& getTempDists() const { return temp_r_; } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& getTempDispls() const { return temp_dr_; } + + /** return old distances set up by move() for optimized distance table + * consumers + */ + const DistRow& getOldDists() const { return old_r_; } + + /** return old displacements set up by move() for optimized distance table + * consumers + */ + const DisplRow& getOldDispls() const { return old_dr_; } + + virtual size_t get_num_particls_stored() const { return 0; } + + /// return multi walker temporary pair distance table data pointer + [[noreturn]] virtual const RealType* getMultiWalkerTempDataPtr() const + { + throw std::runtime_error(this->name_ + " multi walker data pointer for temp not supported"); + } + + virtual const RealType* mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + size_t range_begin, + size_t range_end) const + { + return nullptr; + } +}; + +/** AB type of DistanceTable containing storage */ +template +class DistanceTableABT : public DistanceTableT +{ +public: + using DistRow = typename DistanceTableT::DistRow; + using DisplRow = typename DistanceTableT::DisplRow; + using RealType = typename DistanceTableT::RealType; + +protected: + /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| + * Note: Derived classes decide if it is a memory view or the actual + * storage + */ + std::vector distances_; + + /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - + * r_A1[i] Note: Derived classes decide if it is a memory view or the actual + * storage + */ + std::vector displacements_; + + /// temp_r + DistRow temp_r_; + + /// temp_dr + DisplRow temp_dr_; + +public: + /// constructor using source and target ParticleSet + DistanceTableABT(const ParticleSetT& source, const ParticleSetT& target, DTModes modes) + : DistanceTableT(source, target, modes) + {} + + /** return full table distances + */ + const std::vector& getDistances() const { return distances_; } + + /** return full table displacements + */ + const std::vector& getDisplacements() const { return displacements_; } + + /** return a row of distances for a given target particle + */ + const DistRow& getDistRow(int iel) const { return distances_[iel]; } + + /** return a row of displacements for a given target particle + */ + const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; } + + /** return the temporary distances when a move is proposed + */ + const DistRow& getTempDists() const { return temp_r_; } + + /** return the temporary displacements when a move is proposed + */ + const DisplRow& getTempDispls() const { return temp_dr_; } + + /// return multi-walker full (all pairs) distance table data pointer + [[noreturn]] virtual const RealType* getMultiWalkerDataPtr() const + { + throw std::runtime_error(this->name_ + " multi walker data pointer not supported"); + } + + /// return stride of per target pctl data. full table data = stride * num of + /// target particles + [[noreturn]] virtual size_t getPerTargetPctlStrideSize() const + { + throw std::runtime_error(this->name_ + " getPerTargetPctlStrideSize not supported"); + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/DynamicCoordinates.h b/src/Particle/DynamicCoordinates.h index 3b53c4a4c6..0cadfddb86 100644 --- a/src/Particle/DynamicCoordinates.h +++ b/src/Particle/DynamicCoordinates.h @@ -15,102 +15,11 @@ #ifndef QMCPLUSPLUS_DYNAMICCOORDINATES_H #define QMCPLUSPLUS_DYNAMICCOORDINATES_H -#include #include "Configuration.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "type_traits/template_types.hpp" +#include "Particle/DynamicCoordinatesT.h" namespace qmcplusplus { -class ResourceCollection; - -/** enumerator for DynamicCoordinates kinds - */ -enum class DynamicCoordinateKind -{ - DC_POS, // SoA positions - DC_POS_OFFLOAD, // SoA positions with OpenMP offload -}; - -/** quantum variables of all the particles - */ -class DynamicCoordinates -{ -public: - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - using ParticlePos = PtclOnLatticeTraits::ParticlePos; - using PosVectorSoa = VectorSoaContainer; - - DynamicCoordinates(const DynamicCoordinateKind kind_in) : variable_kind_(kind_in) {} - - DynamicCoordinates(const DynamicCoordinates&) = default; - DynamicCoordinates& operator=(const DynamicCoordinates&) = delete; - - DynamicCoordinateKind getKind() const { return variable_kind_; } - - virtual ~DynamicCoordinates() = default; - - virtual std::unique_ptr makeClone() = 0; - - /** resize internal storages based on the number of particles - * @param n the number of particles - */ - virtual void resize(size_t n) = 0; - /// return the number of particles - virtual size_t size() const = 0; - - /// overwrite the positions of all the particles. - virtual void setAllParticlePos(const ParticlePos& R) = 0; - /// overwrite the position of one the particle. - virtual void setOneParticlePos(const PosType& pos, size_t iat) = 0; - /** copy the active positions of particles with a uniform id in all the walkers to a single internal buffer. - * @param coords_list a batch of DynamicCoordinates - * @param iat paricle id, uniform across coords_list - * @param new_positions proposed positions - */ - virtual void mw_copyActivePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions) const - { - assert(this == &coords_list.getLeader()); - } - - /** overwrite the positions of particles with a uniform id in all the walkers upon acceptance. - * @param coords_list a batch of DynamicCoordinates - * @param iat paricle id, uniform across coords_list - * @param new_positions proposed positions - * @param isAccepted accept/reject info - */ - virtual void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const = 0; - - /// all particle position accessor - virtual const PosVectorSoa& getAllParticlePos() const = 0; - /// one particle position accessor - virtual PosType getOneParticlePos(size_t iat) const = 0; - - /// secure internal data consistency after p-by-p moves - virtual void donePbyP() {} - - /// initialize a shared resource and hand it to a collection - virtual void createResource(ResourceCollection& collection) const {} - - /// acquire a shared resource from a collection - virtual void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - {} - - /// return a shared resource to a collection - virtual void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const - {} - -protected: - /// type of dynamic coordinates - const DynamicCoordinateKind variable_kind_; -}; +using DynamicCoordinates = DynamicCoordinatesT; } // namespace qmcplusplus #endif diff --git a/src/Particle/DynamicCoordinatesT.cpp b/src/Particle/DynamicCoordinatesT.cpp new file mode 100644 index 0000000000..849a5d7d73 --- /dev/null +++ b/src/Particle/DynamicCoordinatesT.cpp @@ -0,0 +1,39 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/DynamicCoordinatesT.h" + +#include "Particle/RealSpacePositionsT.h" +#include "Particle/RealSpacePositionsTOMPTarget.h" + +namespace qmcplusplus +{ +/** create DynamicCoordinates based on kind + */ +template +std::unique_ptr> createDynamicCoordinatesT(const DynamicCoordinateKind kind) +{ + if (kind == DynamicCoordinateKind::DC_POS) + return std::make_unique>(); + else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD) + return std::make_unique>(); + // dummy return + return std::unique_ptr>(); +} + +template std::unique_ptr> createDynamicCoordinatesT( + const DynamicCoordinateKind kind); +template std::unique_ptr> createDynamicCoordinatesT(const DynamicCoordinateKind kind); +template std::unique_ptr>> createDynamicCoordinatesT>( + const DynamicCoordinateKind kind); +template std::unique_ptr>> createDynamicCoordinatesT>( + const DynamicCoordinateKind kind); +} // namespace qmcplusplus diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h new file mode 100644 index 0000000000..a8bf36ad21 --- /dev/null +++ b/src/Particle/DynamicCoordinatesT.h @@ -0,0 +1,123 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DYNAMICCOORDINATEST_H +#define QMCPLUSPLUS_DYNAMICCOORDINATEST_H + +#include + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "ParticleSetTraits.h" +#include "type_traits/template_types.hpp" + +namespace qmcplusplus +{ +class ResourceCollection; + +/** enumerator for DynamicCoordinates kinds + */ +enum class DynamicCoordinateKind +{ + DC_POS, // SoA positions + DC_POS_OFFLOAD, // SoA positions with OpenMP offload +}; + +/** quantum variables of all the particles + */ +template +class DynamicCoordinatesT +{ +public: + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using PosVectorSoa = VectorSoaContainer::DIM>; + + DynamicCoordinatesT(const DynamicCoordinateKind kind_in) : variable_kind_(kind_in) {} + + DynamicCoordinatesT(const DynamicCoordinatesT&) = default; + DynamicCoordinatesT& operator=(const DynamicCoordinatesT&) = delete; + + DynamicCoordinateKind getKind() const { return variable_kind_; } + + virtual ~DynamicCoordinatesT() = default; + + virtual std::unique_ptr makeClone() = 0; + + /** resize internal storages based on the number of particles + * @param n the number of particles + */ + virtual void resize(size_t n) = 0; + /// return the number of particles + virtual size_t size() const = 0; + + /// overwrite the positions of all the particles. + virtual void setAllParticlePos(const ParticlePos& R) = 0; + /// overwrite the position of one the particle. + virtual void setOneParticlePos(const PosType& pos, size_t iat) = 0; + /** copy the active positions of particles with a uniform id in all the + * walkers to a single internal buffer. + * @param coords_list a batch of DynamicCoordinates + * @param iat paricle id, uniform across coords_list + * @param new_positions proposed positions + */ + virtual void mw_copyActivePos(const RefVectorWithLeader& coords_list, + size_t iat, + const std::vector& new_positions) const + { + assert(this == &coords_list.getLeader()); + } + + /** overwrite the positions of particles with a uniform id in all the + * walkers upon acceptance. + * @param coords_list a batch of DynamicCoordinates + * @param iat paricle id, uniform across coords_list + * @param new_positions proposed positions + * @param isAccepted accept/reject info + */ + virtual void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, + size_t iat, + const std::vector& new_positions, + const std::vector& isAccepted) const = 0; + + /// all particle position accessor + virtual const PosVectorSoa& getAllParticlePos() const = 0; + /// one particle position accessor + virtual PosType getOneParticlePos(size_t iat) const = 0; + + /// secure internal data consistency after p-by-p moves + virtual void donePbyP() {} + + /// initialize a shared resource and hand it to a collection + virtual void createResource(ResourceCollection& collection) const {} + + /// acquire a shared resource from a collection + virtual void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + {} + + /// return a shared resource to a collection + virtual void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& coords_list) const + {} + +protected: + /// type of dynamic coordinates + const DynamicCoordinateKind variable_kind_; +}; + +/** create DynamicCoordinates based on kind + */ +template +std::unique_ptr> createDynamicCoordinatesT( + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); +} // namespace qmcplusplus +#endif diff --git a/src/Particle/InitMolecularSystem.h b/src/Particle/InitMolecularSystem.h index 41f56d8f77..fddfc70916 100644 --- a/src/Particle/InitMolecularSystem.h +++ b/src/Particle/InitMolecularSystem.h @@ -17,49 +17,10 @@ #ifndef QMCPLUSPLUS_INITMOLECULARSYSTEM_H #define QMCPLUSPLUS_INITMOLECULARSYSTEM_H -#include "OhmmsData/OhmmsElementBase.h" -#include +#include "Particle/InitMolecularSystemT.h" namespace qmcplusplus { -class ParticleSet; -class ParticleSetPool; - -/* Engine to initialize the initial electronic structure for a molecular system - */ -class InitMolecularSystem : public OhmmsElementBase -{ -public: - InitMolecularSystem(ParticleSetPool& pset, const char* aname = "mosystem"); - - bool get(std::ostream& os) const override; - bool put(std::istream& is) override; - bool put(xmlNodePtr cur) override; - void reset() override; - - /** initialize els for an atom - */ - void initAtom(ParticleSet* ions, ParticleSet* els); - /** initialize els position for a molecule - * - * Use the valence of each ionic species on a sphere - */ - void initMolecule(ParticleSet* ions, ParticleSet* els); - /** initialize els for the systems with a mixed boundary - * - * Use the bound of the ionic systems and uniform random positions within a reduced box - */ - void initWithVolume(ParticleSet* ions, ParticleSet* els); - -private: - /** pointer to ParticleSetPool - * - * QMCHamiltonian needs to know which ParticleSet object - * is used as an input object for the evaluations. - * Any number of ParticleSet can be used to describe - * a QMCHamiltonian. - */ - ParticleSetPool& ptclPool; -}; +using InitMolecularSystem = InitMolecularSystemT; } // namespace qmcplusplus #endif diff --git a/src/Particle/InitMolecularSystem.cpp b/src/Particle/InitMolecularSystemT.cpp similarity index 70% rename from src/Particle/InitMolecularSystem.cpp rename to src/Particle/InitMolecularSystemT.cpp index 9639bdbebf..e13c549ce3 100644 --- a/src/Particle/InitMolecularSystem.cpp +++ b/src/Particle/InitMolecularSystemT.cpp @@ -15,25 +15,22 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "InitMolecularSystemT.h" -/**@file InitMolecularSystem.cpp - * @brief Implements InitMolecuarSystem operators. - */ -#include "InitMolecularSystem.h" -#include "Particle/ParticleSetPool.h" #include "OhmmsData/AttributeSet.h" -#include "Particle/DistanceTable.h" +#include "Particle/DistanceTableT.h" +#include "Particle/ParticleSetPoolT.h" #include "ParticleBase/RandomSeqGeneratorGlobal.h" namespace qmcplusplus { -using RealType = QMCTraits::RealType; - -InitMolecularSystem::InitMolecularSystem(ParticleSetPool& pset, const char* aname) +template +InitMolecularSystemT::InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname) : OhmmsElementBase(aname), ptclPool(pset) {} -bool InitMolecularSystem::put(xmlNodePtr cur) +template +bool InitMolecularSystemT::put(xmlNodePtr cur) { std::string target("e"), source("i"), volume("no"); OhmmsAttributeSet hAttrib; @@ -41,13 +38,13 @@ bool InitMolecularSystem::put(xmlNodePtr cur) hAttrib.add(source, "source"); hAttrib.add(volume, "use_volume"); hAttrib.put(cur); - ParticleSet* els = ptclPool.getParticleSet(target); + ParticleSetT* els = ptclPool.getParticleSet(target); if (els == 0) { ERRORMSG("No target particle " << target << " exists.") return false; } - ParticleSet* ions = ptclPool.getParticleSet(source); + ParticleSetT* ions = ptclPool.getParticleSet(source); if (ions == 0) { ERRORMSG("No source particle " << source << " exists.") @@ -70,10 +67,11 @@ bool InitMolecularSystem::put(xmlNodePtr cur) return true; } -void InitMolecularSystem::initAtom(ParticleSet* ions, ParticleSet* els) +template +void InitMolecularSystemT::initAtom(ParticleSetT* ions, ParticleSetT* els) { - //3N-dimensional Gaussian - ParticleSet::ParticlePos chi(els->getTotalNum()); + // 3N-dimensional Gaussian + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); makeGaussRandom(chi); RealType q = std::sqrt(static_cast(els->getTotalNum())) * 0.5; int nel(els->getTotalNum()), items(0); @@ -85,47 +83,52 @@ void InitMolecularSystem::initAtom(ParticleSet* ions, ParticleSet* els) } } -struct LoneElectron +template +struct LoneElectronT { + using RealType = TReal; int ID; RealType BondLength; - inline LoneElectron(int id, RealType bl) : ID(id), BondLength(bl) {} + inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl) {} }; -void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) +template +void InitMolecularSystemT::initMolecule(ParticleSetT* ions, ParticleSetT* els) { if (ions->getTotalNum() == 1) return initAtom(ions, els); const int d_ii_ID = ions->addTable(*ions); ions->update(); - const ParticleSet::ParticleIndex& grID(ions->GroupID); + const typename ParticleSetT::ParticleIndex& grID(ions->GroupID); SpeciesSet& Species(ions->getSpeciesSet()); int Centers = ions->getTotalNum(); std::vector Qtot(Centers), Qcore(Centers), Qval(Centers, 0); - //use charge as the core electrons first + // use charge as the core electrons first int icharge = Species.addAttribute("charge"); - //Assign default core charge + // Assign default core charge for (int iat = 0; iat < Centers; iat++) Qtot[iat] = static_cast(Species(icharge, grID[iat])); - //cutoff radius (Bohr) this a random choice + // cutoff radius (Bohr) this a random choice RealType cutoff = 4.0; - ParticleSet::ParticlePos chi(els->getTotalNum()); - //makeGaussRandom(chi); + typename ParticleSetT::ParticlePos chi(els->getTotalNum()); + // makeGaussRandom(chi); makeSphereRandom(chi); // the upper limit of the electron index with spin up const int numUp = els->last(0); - // the upper limit of the electron index with spin down. Pay attention to the no spin down electron case. + // the upper limit of the electron index with spin down. Pay attention to + // the no spin down electron case. const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0); // consumer counter of random numbers chi int random_number_counter = 0; int nup_tot = 0, ndown_tot = numUp; - std::vector loneQ; + std::vector> loneQ; RealType rmin = cutoff; - ParticleSet::SingleParticlePos cm; + typename ParticleSetT::SingleParticlePos cm; const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances(); - // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, put Q[iat]-1 and save the lone electron. + // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd, + // put Q[iat]-1 and save the lone electron. for (size_t iat = 0; iat < Centers; iat++) { cm += ions->R[iat]; @@ -133,12 +136,12 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) { rmin = std::min(rmin, dist[jat][iat]); } - //use 40% of the minimum bond + // use 40% of the minimum bond RealType sep = rmin * 0.4; int v2 = Qtot[iat] / 2; if (Qtot[iat] > v2 * 2) { - loneQ.push_back(LoneElectron(iat, sep)); + loneQ.push_back(LoneElectronT(iat, sep)); } for (int k = 0; k < v2; k++) { @@ -155,8 +158,8 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) // imbalances in molecules at large distances. // Not guaranteed to work, but should help in most cases // as long as atoms in molecules are defined sequencially - std::vector::iterator it(loneQ.begin()); - std::vector::iterator it_end(loneQ.end()); + typename std::vector>::iterator it(loneQ.begin()); + typename std::vector>::iterator it_end(loneQ.end()); while (it != it_end && nup_tot != numUp && ndown_tot != numDown) { if (nup_tot < numUp) @@ -172,7 +175,7 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) } // Step 3. Handle more than neutral electrons - //extra electrons around the geometric center + // extra electrons around the geometric center RealType cnorm = 1.0 / static_cast(Centers); RealType sep = rmin * 2; cm = cnorm * cm; @@ -183,11 +186,13 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) while (ndown_tot < numDown) els->R[ndown_tot++] = cm + sep * chi[random_number_counter++]; - // safety check. all the random numbers should have been consumed once and only once. + // safety check. all the random numbers should have been consumed once and + // only once. if (random_number_counter != chi.size()) - throw std::runtime_error("initMolecule unexpected random number consumption. Please report a bug!"); + throw std::runtime_error("initMolecule unexpected random number " + "consumption. Please report a bug!"); - //put all the electrons in a unit box + // put all the electrons in a unit box if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) { els->R.setUnit(PosUnit::Cartesian); @@ -196,26 +201,27 @@ void InitMolecularSystem::initMolecule(ParticleSet* ions, ParticleSet* els) } } -///helper function to determine the lower bound of a domain (need to move up) +/// helper function to determine the lower bound of a domain (need to move up) template inline TinyVector lower_bound(const TinyVector& a, const TinyVector& b) { return TinyVector(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2])); } -///helper function to determine the upper bound of a domain (need to move up) +/// helper function to determine the upper bound of a domain (need to move up) template inline TinyVector upper_bound(const TinyVector& a, const TinyVector& b) { return TinyVector(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2])); } -void InitMolecularSystem::initWithVolume(ParticleSet* ions, ParticleSet* els) +template +void InitMolecularSystemT::initWithVolume(ParticleSetT* ions, ParticleSetT* els) { TinyVector start(1.0); TinyVector end(0.0); - ParticleSet::ParticlePos Ru(ions->getTotalNum()); + typename ParticleSetT::ParticlePos Ru(ions->getTotalNum()); Ru.setUnit(PosUnit::Lattice); ions->applyBC(ions->R, Ru); @@ -228,23 +234,23 @@ void InitMolecularSystem::initWithVolume(ParticleSet* ions, ParticleSet* els) TinyVector shift; Tensor newbox(ions->getLattice().R); - RealType buffer = 2.0; //buffer 2 bohr + RealType buffer = 2.0; // buffer 2 bohr for (int idim = 0; idim < OHMMS_DIM; ++idim) { - //if(ions->getLattice().BoxBConds[idim]) + // if(ions->getLattice().BoxBConds[idim]) //{ - // start[idim]=0.0; - // end[idim]=1.0; - // shift[idim]=0.0; - //} - //else + // start[idim]=0.0; + // end[idim]=1.0; + // shift[idim]=0.0; + // } + // else { RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim]; start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r)); end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r)); shift[idim] = start[idim] * ions->getLattice().Length[idim]; if (std::abs(end[idim] = start[idim]) < buffer) - { //handle singular case + { // handle singular case start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0); end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0); } @@ -253,7 +259,7 @@ void InitMolecularSystem::initWithVolume(ParticleSet* ions, ParticleSet* els) } } - ParticleSet::ParticleLayout slattice(ions->getLattice()); + typename ParticleSetT::ParticleLayout slattice(ions->getLattice()); slattice.set(newbox); app_log() << " InitMolecularSystem::initWithVolume " << std::endl; @@ -267,9 +273,34 @@ void InitMolecularSystem::initWithVolume(ParticleSet* ions, ParticleSet* els) els->R.setUnit(PosUnit::Cartesian); } -bool InitMolecularSystem::put(std::istream& is) { return true; } +template +bool InitMolecularSystemT::put(std::istream& is) +{ + return true; +} + +template +bool InitMolecularSystemT::get(std::ostream& os) const +{ + return true; +} + +template +void InitMolecularSystemT::reset() +{} -bool InitMolecularSystem::get(std::ostream& os) const { return true; } +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class InitMolecularSystemT; +#else +template class InitMolecularSystemT; +#endif +#else +#ifndef MIXED_PRECISION +template class InitMolecularSystemT>; +#else +template class InitMolecularSystemT>; +#endif +#endif -void InitMolecularSystem::reset() {} } // namespace qmcplusplus diff --git a/src/Particle/InitMolecularSystemT.h b/src/Particle/InitMolecularSystemT.h new file mode 100644 index 0000000000..e85914a8c1 --- /dev/null +++ b/src/Particle/InitMolecularSystemT.h @@ -0,0 +1,69 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_INITMOLECULARSYSTEMT_H +#define QMCPLUSPLUS_INITMOLECULARSYSTEMT_H + +#include "OhmmsData/OhmmsElementBase.h" +#include "ParticleSetTraits.h" + +#include + +namespace qmcplusplus +{ +template +class ParticleSetT; +template +class ParticleSetPoolT; + +/* Engine to initialize the initial electronic structure for a molecular system + */ +template +class InitMolecularSystemT : public OhmmsElementBase +{ +public: + using RealType = typename ParticleSetTraits::RealType; + + InitMolecularSystemT(ParticleSetPoolT& pset, const char* aname = "mosystem"); + + bool get(std::ostream& os) const override; + bool put(std::istream& is) override; + bool put(xmlNodePtr cur) override; + void reset() override; + + /** initialize els for an atom + */ + void initAtom(ParticleSetT* ions, ParticleSetT* els); + /** initialize els position for a molecule + * + * Use the valence of each ionic species on a sphere + */ + void initMolecule(ParticleSetT* ions, ParticleSetT* els); + /** initialize els for the systems with a mixed boundary + * + * Use the bound of the ionic systems and uniform random positions within a + * reduced box + */ + void initWithVolume(ParticleSetT* ions, ParticleSetT* els); + +private: + /** pointer to ParticleSetPool + * + * QMCHamiltonian needs to know which ParticleSet object + * is used as an input object for the evaluations. + * Any number of ParticleSet can be used to describe + * a QMCHamiltonian. + */ + ParticleSetPoolT& ptclPool; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/Lattice/CrystalLattice.h b/src/Particle/Lattice/CrystalLattice.h index a8d56236e5..54c341836b 100644 --- a/src/Particle/Lattice/CrystalLattice.h +++ b/src/Particle/Lattice/CrystalLattice.h @@ -56,7 +56,7 @@ struct CrystalLattice : public LRBreakupParameters { /// alias to the base class using Base = LRBreakupParameters; - + static_assert(std::is_floating_point_v); ///enumeration for the dimension of the lattice enum { diff --git a/src/Particle/Lattice/LRBreakupParameters.h b/src/Particle/Lattice/LRBreakupParameters.h index da44f6fc40..4096bf0e42 100644 --- a/src/Particle/Lattice/LRBreakupParameters.h +++ b/src/Particle/Lattice/LRBreakupParameters.h @@ -57,7 +57,7 @@ class LRBreakupParameters T beta2 = (dot(v1, v1) * dot(c, v2) - dot(v1, v2) * dot(c, v1)) / (dot(v1, v1) * dot(v2, v2) - dot(v1, v2) * dot(v1, v2)); TinyVector p = beta1 * v1 + beta2 * v2; - T dist = sqrt(dot(p - c, p - c)); + T dist = std::sqrt(dot(p - c, p - c)); LR_rc = std::min(LR_rc, dist); } //Set KC for structure-factor and LRbreakups. diff --git a/src/Particle/LongRange/KContainer.h b/src/Particle/LongRange/KContainer.h index eee91affc7..c181806107 100644 --- a/src/Particle/LongRange/KContainer.h +++ b/src/Particle/LongRange/KContainer.h @@ -16,83 +16,11 @@ #define QMCPLUSPLUS_KCONTAINER_H #include "Configuration.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "KContainerT.h" namespace qmcplusplus { -/** Container for k-points - * - * It generates a set of k-points that are unit-translations of the reciprocal-space - * cell. K-points are generated within a spherical cutoff set by the supercell - */ -class KContainer : public QMCTraits -{ -private: - /// The cutoff up to which k-vectors are generated. - RealType kcutoff; - -public: - //Typedef for the lattice-type - using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; - - ///number of k-points - int numk; - - /** maximum integer translations of reciprocal cell within kc. - * - * Last index is max. of first dimension+1 - */ - TinyVector mmax; - - /** K-vector in reduced coordinates - */ - std::vector> kpts; - /** K-vector in Cartesian coordinates - */ - std::vector kpts_cart; - /** squre of kpts in Cartesian coordniates - */ - std::vector ksq; - /** Given a k index, return index to -k - */ - std::vector minusk; - /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */ - std::vector kshell; - - /** k points sorted by the |k| excluding |k|=0 - * - * The first for |k| - * The second for a map to the full index. The size of the second is the degeneracy. - */ - //std::map*> kpts_sorted; - - /** update k-vectors - * @param sc supercell - * @param kc cutoff radius in the K - * @param twist shifts the center of the grid of k-vectors - * @param useSphere if true, use the |K| - */ - void updateKLists(const ParticleLayout& lattice, - RealType kc, - unsigned ndim, - const PosType& twist = PosType(), - bool useSphere = true); - - const auto& get_kpts_cart_soa() const { return kpts_cart_soa_; } - -private: - /** compute approximate parallelpiped that surrounds kc - * @param lattice supercell - */ - void findApproxMMax(const ParticleLayout& lattice, unsigned ndim); - /** construct the container for k-vectors */ - void BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere); - - /** K-vector in Cartesian coordinates in SoA layout - */ - VectorSoaContainer> kpts_cart_soa_; -}; +using KContainer = KContainerT; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/KContainer.cpp b/src/Particle/LongRange/KContainerT.cpp similarity index 68% rename from src/Particle/LongRange/KContainer.cpp rename to src/Particle/LongRange/KContainerT.cpp index 72d4c8bd17..13eaa470d4 100644 --- a/src/Particle/LongRange/KContainer.cpp +++ b/src/Particle/LongRange/KContainerT.cpp @@ -11,21 +11,23 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "KContainerT.h" -#include "KContainer.h" -#include -#include -#include "Message/Communicate.h" #include "LRCoulombSingleton.h" +#include "Message/Communicate.h" #include "Utilities/qmc_common.h" +#include +#include + namespace qmcplusplus { -void KContainer::updateKLists(const ParticleLayout& lattice, - RealType kc, - unsigned ndim, - const PosType& twist, - bool useSphere) +template +void KContainerT::updateKLists(const ParticleLayout& lattice, + RealType kc, + unsigned ndim, + const PosType& twist, + bool useSphere) { kcutoff = kc; if (kcutoff <= 0.0) @@ -41,44 +43,45 @@ void KContainer::updateKLists(const ParticleLayout& lattice, app_log() << std::endl; } -void KContainer::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) +template +void KContainerT::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) { - //Estimate the size of the parallelpiped that encompasses a sphere of kcutoff. - //mmax is stored as integer translations of the reciprocal cell vectors. - //Does not require an orthorhombic cell. + // Estimate the size of the parallelpiped that encompasses a sphere of + // kcutoff. mmax is stored as integer translations of the reciprocal cell + // vectors. Does not require an orthorhombic cell. /* Old method. - //2pi is not included in lattice.b - Matrix mmat; - mmat.resize(3,3); - for(int j=0;j<3;j++) - for(int i=0;i<3;i++){ - mmat[i][j] = 0.0; - for(int k=0;k<3;k++) - mmat[i][j] = mmat[i][j] + 4.0*M_PI*M_PI*lattice.b(k)[i]*lattice.b(j)[k]; - } + //2pi is not included in lattice.b + Matrix mmat; + mmat.resize(3,3); + for(int j=0;j<3;j++) + for(int i=0;i<3;i++){ + mmat[i][j] = 0.0; + for(int k=0;k<3;k++) + mmat[i][j] = mmat[i][j] + 4.0*M_PI*M_PI*lattice.b(k)[i]*lattice.b(j)[k]; + } - TinyVector x,temp; - RealType tempr; - for(int idim=0;idim<3;idim++){ - int i = ((idim)%3); - int j = ((idim+1)%3); - int k = ((idim+2)%3); + TinyVector x,temp; + RealType tempr; + for(int idim=0;idim<3;idim++){ + int i = ((idim)%3); + int j = ((idim+1)%3); + int k = ((idim+2)%3); - x[i] = 1.0; - x[j] = (mmat[j][k]*mmat[k][i] - mmat[k][k]*mmat[i][j]); - x[j]/= (mmat[j][j]*mmat[k][k] - mmat[j][k]*mmat[j][k]); - x[k] = -(mmat[k][i] + mmat[j][k]*x[j])/mmat[k][k]; + x[i] = 1.0; + x[j] = (mmat[j][k]*mmat[k][i] - mmat[k][k]*mmat[i][j]); + x[j]/= (mmat[j][j]*mmat[k][k] - mmat[j][k]*mmat[j][k]); + x[k] = -(mmat[k][i] + mmat[j][k]*x[j])/mmat[k][k]; - for(i=0;i<3;i++){ - temp[i] = 0.0; - for(j=0;j<3;j++) - temp[i] += mmat[i][j]*x[j]; - } + for(i=0;i<3;i++){ + temp[i] = 0.0; + for(j=0;j<3;j++) + temp[i] += mmat[i][j]*x[j]; + } - tempr = dot(x,temp); - mmax[idim] = static_cast(sqrt(4.0*kcut2/tempr)) + 1; - } - */ + tempr = dot(x,temp); + mmax[idim] = static_cast(sqrt(4.0*kcut2/tempr)) + 1; + } + */ // see rmm, Electronic Structure, p. 85 for details for (int i = 0; i < DIM; i++) mmax[i] = static_cast(std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) * kcutoff / (2 * M_PI))) + 1; @@ -87,7 +90,7 @@ void KContainer::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) for (int i = 1; i < DIM; ++i) mmax[DIM] = std::max(mmax[i], mmax[DIM]); - //overwrite the non-periodic directon to be zero + // overwrite the non-periodic directon to be zero if (LRCoulombSingleton::isQuasi2D()) { app_log() << " No kspace sum perpendicular to slab " << std::endl; @@ -102,7 +105,8 @@ void KContainer::findApproxMMax(const ParticleLayout& lattice, unsigned ndim) mmax[1] = 0; } -void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere) +template +void KContainerT::BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere) { TinyVector TempActualMax; TinyVector kvec; @@ -115,7 +119,7 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist if (useSphere) { const RealType kcut2 = kcutoff * kcutoff; - //Loop over guesses for valid k-points. + // Loop over guesses for valid k-points. for (int i = -mmax[0]; i <= mmax[0]; i++) { kvec[0] = i; @@ -125,20 +129,20 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist for (int k = -mmax[2]; k <= mmax[2]; k++) { kvec[2] = k; - //Do not include k=0 in evaluations. + // Do not include k=0 in evaluations. if (i == 0 && j == 0 && k == 0) continue; - //Convert kvec to Cartesian + // Convert kvec to Cartesian kvec_cart = lattice.k_cart(kvec + twist); - //Find modk + // Find modk modk2 = dot(kvec_cart, kvec_cart); if (modk2 > kcut2) - continue; //Inside cutoff? - //This k-point should be added to the list + continue; // Inside cutoff? + // This k-point should be added to the list kpts_tmp.push_back(kvec); kpts_cart_tmp.push_back(kvec_cart); ksq_tmp.push_back(modk2); - //Update record of the allowed maximum translation. + // Update record of the allowed maximum translation. for (int idim = 0; idim < 3; idim++) if (std::abs(kvec[idim]) > TempActualMax[idim]) TempActualMax[idim] = std::abs(kvec[idim]); @@ -148,10 +152,10 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist } else { - // Loop over all k-points in the parallelpiped and add them to kcontainer - // note layout is for interfacing with fft, so for each dimension, the - // positive indexes come first then the negative indexes backwards - // e.g. 0, 1, .... mmax, -mmax+1, -mmax+2, ... -1 + // Loop over all k-points in the parallelpiped and add them to + // kcontainer note layout is for interfacing with fft, so for each + // dimension, the positive indexes come first then the negative indexes + // backwards e.g. 0, 1, .... mmax, -mmax+1, -mmax+2, ... -1 const int idimsize = mmax[0] * 2; const int jdimsize = mmax[1] * 2; const int kdimsize = mmax[2] * 2; @@ -186,14 +190,15 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist TempActualMax[2] = mmax[2]; } - //Update a record of the number of k vectors + // Update a record of the number of k vectors numk = kpts_tmp.size(); std::map*> kpts_sorted; - //create the map: use simple integer with resolution of 0.00000001 in ksq + // create the map: use simple integer with resolution of 0.00000001 in ksq for (int ik = 0; ik < numk; ik++) { - //This is a workaround for ewald bug (Issue #2105). Basically, 1e-7 is the resolution of |k|^2 for doubles, - //so we jack up the tolerance to match that. + // This is a workaround for ewald bug (Issue #2105). Basically, 1e-7 is + // the resolution of |k|^2 for doubles, so we jack up the tolerance to + // match that. const int64_t k_ind = static_cast(ksq_tmp[ik] * 10000000); auto it(kpts_sorted.find(k_ind)); if (it == kpts_sorted.end()) @@ -239,18 +244,19 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist delete it->second; it++; } - //Finished searching k-points. Copy list of maximum translations. + // Finished searching k-points. Copy list of maximum translations. mmax[DIM] = 0; for (int idim = 0; idim < DIM; idim++) { mmax[idim] = TempActualMax[idim]; mmax[DIM] = std::max(mmax[idim], mmax[DIM]); - //if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim]; + // if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim]; } - //Now fill the array that returns the index of -k when given the index of k. + // Now fill the array that returns the index of -k when given the index of + // k. minusk.resize(numk); - //Assigns a unique hash value to each kpoint. + // Assigns a unique hash value to each kpoint. auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t { int64_t hash = 0; // this will cause integral promotion below for (int i = 0; i < inpv.Size; ++i) @@ -271,4 +277,8 @@ void KContainer::BuildKLists(const ParticleLayout& lattice, const PosType& twist } } +template class KContainerT; +template class KContainerT; +template class KContainerT>; +template class KContainerT>; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/KContainerT.h b/src/Particle/LongRange/KContainerT.h new file mode 100644 index 0000000000..20b98af347 --- /dev/null +++ b/src/Particle/LongRange/KContainerT.h @@ -0,0 +1,106 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_KCONTAINERT_H +#define QMCPLUSPLUS_KCONTAINERT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "ParticleSetTraits.h" + +namespace qmcplusplus +{ +/** Container for k-points + * + * It generates a set of k-points that are unit-translations of the + * reciprocal-space cell. K-points are generated within a spherical cutoff set + * by the supercell + */ +template +class KContainerT +{ +public: + static constexpr auto DIM = ParticleSetTraits::DIM; + using RealType = typename ParticleSetTraits::RealType; + using PosType = typename ParticleSetTraits::PosType; + +private: + /// The cutoff up to which k-vectors are generated. + RealType kcutoff; + +public: + // Typedef for the lattice-type + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + + /// number of k-points + int numk; + + /** maximum integer translations of reciprocal cell within kc. + * + * Last index is max. of first dimension+1 + */ + TinyVector mmax; + + /** K-vector in reduced coordinates + */ + std::vector> kpts; + /** K-vector in Cartesian coordinates + */ + std::vector kpts_cart; + /** squre of kpts in Cartesian coordniates + */ + std::vector ksq; + /** Given a k index, return index to -k + */ + std::vector minusk; + /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */ + std::vector kshell; + + /** k points sorted by the |k| excluding |k|=0 + * + * The first for |k| + * The second for a map to the full index. The size of the second is the + * degeneracy. + */ + // std::map*> kpts_sorted; + + /** update k-vectors + * @param sc supercell + * @param kc cutoff radius in the K + * @param twist shifts the center of the grid of k-vectors + * @param useSphere if true, use the |K| + */ + void updateKLists(const ParticleLayout& lattice, + RealType kc, + unsigned ndim, + const PosType& twist = PosType(), + bool useSphere = true); + + const auto& get_kpts_cart_soa() const { return kpts_cart_soa_; } + +private: + /** compute approximate parallelpiped that surrounds kc + * @param lattice supercell + */ + void findApproxMMax(const ParticleLayout& lattice, unsigned ndim); + /** construct the container for k-vectors */ + void BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere); + + /** K-vector in Cartesian coordinates in SoA layout + */ + VectorSoaContainer> kpts_cart_soa_; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/LongRange/StructFact.h b/src/Particle/LongRange/StructFact.h index cfa29e9255..79fe0a2e99 100644 --- a/src/Particle/LongRange/StructFact.h +++ b/src/Particle/LongRange/StructFact.h @@ -14,113 +14,12 @@ #ifndef QMCPLUSPLUS_STRUCTFACT_H #define QMCPLUSPLUS_STRUCTFACT_H -#include "OhmmsPETE/OhmmsVector.h" -#include "OhmmsPETE/OhmmsMatrix.h" #include "Configuration.h" -#include -#include -#include -#include +#include "StructFactT.h" namespace qmcplusplus { -class ParticleSet; -class KContainer; -struct SKMultiWalkerMem; - -/** @ingroup longrange - *\brief Calculates the structure-factor for a particle set - * - * Structure factor per species - * Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf r_i}}\f$ - * Structure factor per particle - * eikr[i][k] - */ -class StructFact : public QMCTraits -{ -public: - //Typedef for the lattice-type - using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; - - /** enumeration for the methods to handle mixed bconds - * - * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with mixed BC - */ - int SuperCellEnum; - ///2-D container for the phase - Matrix rhok_r, rhok_i; - Matrix eikr_r, eikr_i; - /** Constructor - copy ParticleSet and init. k-shells - * @param lattice long range box - * @param kc cutoff for k - * - * At least in the batched version Structure factor is _NOT_ valid - * after construction. - */ - StructFact(const ParticleLayout& lattice, const KContainer& k_lists); - /// desructor - ~StructFact(); - - /** Update Rhok if all particles moved - */ - void updateAllPart(const ParticleSet& P); - - /** Update RhoK for all particles for multiple walkers particles. - * - * In batched context until this is called StructFact is invalid and will cause a crash if any Hamiltonian using StructFact - * indirectly through ParticleSet is evaluated. - */ - static void mw_updateAllPart(const RefVectorWithLeader& sk_list, - const RefVectorWithLeader& p_list, - SKMultiWalkerMem& mw_mem); - - /** @brief switch on the storage per particle - * if StorePerParticle was false, this function allocates memory and precompute data - * if StorePerParticle was true, this function is no-op - */ - void turnOnStorePerParticle(const ParticleSet& P); - - /// accessor of StorePerParticle - bool isStorePerParticle() const { return StorePerParticle; } - - /// accessor of k_lists_ - const KContainer& getKLists() const { return k_lists_; } - -private: - /// Compute all rhok elements from the start - void computeRhok(const ParticleSet& P); - /** resize the internal data - * @param nkpts - * @param num_species number of species - * @param num_ptcls number of particles - */ - void resize(int nkpts, int num_species, int num_ptcls); - - /// K-Vector List. - const KContainer& k_lists_; - /** Whether intermediate data is stored per particle. default false - * storing data per particle needs significant amount of memory but some calculation may request it. - * storing data per particle specie is more cost-effective - */ - bool StorePerParticle; - /// timer for updateAllPart - NewTimer& update_all_timer_; -}; - -///multi walker shared memory buffer -struct SKMultiWalkerMem : public Resource -{ - using RealType = StructFact::RealType; - - ///dist displ for temporary and old pairs - Matrix> nw_rhok; - - SKMultiWalkerMem() : Resource("SKMultiWalkerMem") {} - - SKMultiWalkerMem(const SKMultiWalkerMem&) : SKMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } -}; +using StructFact = StructFactT; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFact.cpp b/src/Particle/LongRange/StructFactT.cpp similarity index 82% rename from src/Particle/LongRange/StructFact.cpp rename to src/Particle/LongRange/StructFactT.cpp index b5f23aab9f..e7868fb24d 100644 --- a/src/Particle/LongRange/StructFact.cpp +++ b/src/Particle/LongRange/StructFactT.cpp @@ -9,24 +9,27 @@ // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory // +// // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "StructFactT.h" -#include "StructFact.h" -#include "CPU/math.hpp" -#include "CPU/e2iphi.h" -#include "CPU/SIMD/vmath.hpp" #include "CPU/BLAS.hpp" -#include "Utilities/qmc_common.h" -#include "OMPTarget/OMPTargetMath.hpp" -#include "RealSpacePositionsOMPTarget.h" +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "CPU/math.hpp" #include "LRCoulombSingleton.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "RealSpacePositionsTOMPTarget.h" +#include "Utilities/qmc_common.h" +#include "ParticleSetT.h" namespace qmcplusplus { -//Constructor - pass arguments to k_lists_' constructor -StructFact::StructFact(const ParticleLayout& lattice, const KContainer& k_lists) +// Constructor - pass arguments to k_lists_' constructor +template +StructFactT::StructFactT(const ParticleLayout& lattice, const KContainerT& k_lists) : SuperCellEnum(SUPERCELL_BULK), k_lists_(k_lists), StorePerParticle(false), @@ -39,10 +42,12 @@ StructFact::StructFact(const ParticleLayout& lattice, const KContainer& k_lists) } } -//Destructor -StructFact::~StructFact() = default; +// Destructor +template +StructFactT::~StructFactT() = default; -void StructFact::resize(int nkpts, int num_species, int num_ptcls) +template +void StructFactT::resize(int nkpts, int num_species, int num_ptcls) { rhok_r.resize(num_species, nkpts); rhok_i.resize(num_species, nkpts); @@ -53,16 +58,17 @@ void StructFact::resize(int nkpts, int num_species, int num_ptcls) } } - -void StructFact::updateAllPart(const ParticleSet& P) +template +void StructFactT::updateAllPart(const ParticleSetT& P) { ScopedTimer local(update_all_timer_); computeRhok(P); } -void StructFact::mw_updateAllPart(const RefVectorWithLeader& sk_list, - const RefVectorWithLeader& p_list, - SKMultiWalkerMem& mw_mem) +template +void StructFactT::mw_updateAllPart(const RefVectorWithLeader& sk_list, + const RefVectorWithLeader>& p_list, + SKMultiWalkerMemT& mw_mem) { auto& sk_leader = sk_list.getLeader(); auto& p_leader = p_list.getLeader(); @@ -78,7 +84,7 @@ void StructFact::mw_updateAllPart(const RefVectorWithLeader& sk_list const size_t nk = sk_leader.k_lists_.numk; const size_t nk_padded = kpts_cart.capacity(); - auto& coordinates_leader = static_cast(p_leader.getCoordinates()); + auto& coordinates_leader = static_cast&>(p_leader.getCoordinates()); auto& mw_rsoa_dev_ptrs = coordinates_leader.getMultiWalkerRSoADevicePtrs(); const size_t np_padded = p_leader.getCoordinates().getAllParticlePos().capacity(); @@ -94,7 +100,8 @@ void StructFact::mw_updateAllPart(const RefVectorWithLeader& sk_list auto* mw_rhok_ptr = mw_mem.nw_rhok.data(); auto* group_offsets = p_leader.get_group_offsets().data(); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])") + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])") for (int iw = 0; iw < nw; iw++) for (int ib = 0; ib < num_kblocks; ib++) { @@ -132,10 +139,10 @@ void StructFact::mw_updateAllPart(const RefVectorWithLeader& sk_list } } - /** evaluate rok per species, eikr per particle */ -void StructFact::computeRhok(const ParticleSet& P) +template +void StructFactT::computeRhok(const ParticleSetT& P) { const size_t num_ptcls = P.getTotalNum(); const size_t num_species = P.groups(); @@ -204,7 +211,8 @@ void StructFact::computeRhok(const ParticleSet& P) } } -void StructFact::turnOnStorePerParticle(const ParticleSet& P) +template +void StructFactT::turnOnStorePerParticle(const ParticleSetT& P) { if (!StorePerParticle) { @@ -213,4 +221,13 @@ void StructFact::turnOnStorePerParticle(const ParticleSet& P) } } +template class StructFactT; +template class StructFactT; +template class StructFactT>; +template class StructFactT>; + +template struct SKMultiWalkerMemT; +template struct SKMultiWalkerMemT; +template struct SKMultiWalkerMemT>; +template struct SKMultiWalkerMemT>; } // namespace qmcplusplus diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h new file mode 100644 index 0000000000..1736311f91 --- /dev/null +++ b/src/Particle/LongRange/StructFactT.h @@ -0,0 +1,135 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_STRUCTFACTT_H +#define QMCPLUSPLUS_STRUCTFACTT_H + +#include "OhmmsPETE/OhmmsMatrix.h" +#include "OhmmsPETE/OhmmsVector.h" +#include "Particle/ParticleSetTraits.h" +#include "KContainer.h" +#include "NewTimer.h" +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Resource.h" +#include "type_traits/template_types.hpp" + +namespace qmcplusplus +{ +template +class ParticleSetT; +template +struct SKMultiWalkerMemT; + +/** @ingroup longrange + *\brief Calculates the structure-factor for a particle set + * + * Structure factor per species + * Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf + *r_i}}\f$ Structure factor per particle eikr[i][k] + */ +template +class StructFactT +{ +public: + // Typedef for the lattice-type + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + using RealType = typename ParticleSetTraits::RealType; + + static constexpr auto DIM = ParticleSetTraits::DIM; + + /** enumeration for the methods to handle mixed bconds + * + * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with + * mixed BC + */ + int SuperCellEnum; + /// 2-D container for the phase + Matrix rhok_r, rhok_i; + Matrix eikr_r, eikr_i; + /** Constructor - copy ParticleSet and init. k-shells + * @param lattice long range box + * @param kc cutoff for k + * + * At least in the batched version Structure factor is _NOT_ valid + * after construction. + */ + StructFactT(const ParticleLayout& lattice, const KContainerT& k_lists); + /// desructor + ~StructFactT(); + + /** Update Rhok if all particles moved + */ + void updateAllPart(const ParticleSetT& P); + + /** Update RhoK for all particles for multiple walkers particles. + * + * In batched context until this is called StructFact is invalid and will + * cause a crash if any Hamiltonian using StructFact indirectly through + * ParticleSet is evaluated. + */ + static void mw_updateAllPart(const RefVectorWithLeader& sk_list, + const RefVectorWithLeader>& p_list, + SKMultiWalkerMemT& mw_mem); + + /** @brief switch on the storage per particle + * if StorePerParticle was false, this function allocates memory and + * precompute data if StorePerParticle was true, this function is no-op + */ + void turnOnStorePerParticle(const ParticleSetT& P); + + /// accessor of StorePerParticle + bool isStorePerParticle() const { return StorePerParticle; } + + /// accessor of k_lists_ + const KContainerT& getKLists() const { return k_lists_; } + +private: + /// Compute all rhok elements from the start + void computeRhok(const ParticleSetT& P); + /** resize the internal data + * @param nkpts + * @param num_species number of species + * @param num_ptcls number of particles + */ + void resize(int nkpts, int num_species, int num_ptcls); + + /// K-Vector List. + const KContainerT& k_lists_; + /** Whether intermediate data is stored per particle. default false + * storing data per particle needs significant amount of memory but some + * calculation may request it. storing data per particle specie is more + * cost-effective + */ + bool StorePerParticle; + /// timer for updateAllPart + NewTimer& update_all_timer_; +}; + +/// multi walker shared memory buffer +template +struct SKMultiWalkerMemT : public Resource +{ + using RealType = typename StructFactT::RealType; + + /// dist displ for temporary and old pairs + Matrix> nw_rhok; + + SKMultiWalkerMemT() : Resource("SKMultiWalkerMem") {} + + SKMultiWalkerMemT(const SKMultiWalkerMemT&) : SKMultiWalkerMemT() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/LongRange/tests/test_lrhandler.cpp b/src/Particle/LongRange/tests/test_lrhandler.cpp index f3634028c8..9f0d8a67db 100644 --- a/src/Particle/LongRange/tests/test_lrhandler.cpp +++ b/src/Particle/LongRange/tests/test_lrhandler.cpp @@ -15,6 +15,7 @@ #include "Lattice/CrystalLattice.h" #include "Particle/ParticleSet.h" #include "LongRange/LRHandlerBase.h" +#include "Particle/SimulationCell.h" namespace qmcplusplus { diff --git a/src/Particle/MCCoords.cpp b/src/Particle/MCCoords.cpp deleted file mode 100644 index c2849c71ff..0000000000 --- a/src/Particle/MCCoords.cpp +++ /dev/null @@ -1,51 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2022 QMCPACK developers. -// -// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory -////////////////////////////////////////////////////////////////////////////////////// - -#include "MCCoords.hpp" - -namespace qmcplusplus -{ -void MCCoords::getSubset(const std::size_t offset, - const std::size_t size, - MCCoords& out) const -{ - std::copy_n(positions.begin() + offset, size, out.positions.begin()); -} - -MCCoords& MCCoords::operator+=(const MCCoords& rhs) -{ - assert(positions.size() == rhs.positions.size()); - std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), - [](const QMCTraits::PosType& x, const QMCTraits::PosType& y) { return x + y; }); - return *this; -} - -void MCCoords::getSubset(const std::size_t offset, - const std::size_t size, - MCCoords& out) const -{ - std::copy_n(positions.begin() + offset, size, out.positions.begin()); - std::copy_n(spins.begin() + offset, size, out.spins.begin()); -} - -MCCoords& MCCoords::operator+=(const MCCoords& rhs) -{ - assert(positions.size() == rhs.positions.size()); - std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), - [](const QMCTraits::PosType& x, const QMCTraits::PosType& y) { return x + y; }); - std::transform(spins.begin(), spins.end(), rhs.spins.begin(), spins.begin(), - [](const QMCTraits::FullPrecRealType& x, const QMCTraits::FullPrecRealType& y) { return x + y; }); - return *this; -} - -template struct MCCoords; -template struct MCCoords; -} // namespace qmcplusplus diff --git a/src/Particle/MCCoords.hpp b/src/Particle/MCCoords.hpp index 0c623c0888..f6cee15aa5 100644 --- a/src/Particle/MCCoords.hpp +++ b/src/Particle/MCCoords.hpp @@ -14,55 +14,13 @@ #define QMCPLUSPLUS_MCCOORDS_HPP #include "Configuration.h" -#include "type_traits/complex_help.hpp" -#include - -#include +#include "MCCoordsT.hpp" namespace qmcplusplus { -enum class CoordsType -{ - POS, - POS_SPIN -}; - template -struct MCCoords; - -template<> -struct MCCoords -{ - MCCoords(const std::size_t size) : positions(size) {} - - MCCoords& operator+=(const MCCoords& rhs); - - /** get subset of MCCoords - * [param,out] out - */ - void getSubset(const std::size_t offset, const std::size_t size, MCCoords& out) const; - - std::vector positions; -}; - -template<> -struct MCCoords -{ - MCCoords(const std::size_t size) : positions(size), spins(size) {} - - MCCoords& operator+=(const MCCoords& rhs); - - /** get subset of MCCoords - * [param,out] out - */ - void getSubset(const std::size_t offset, const std::size_t size, MCCoords& out) const; - - std::vector positions; - std::vector spins; -}; +using MCCoords = MCCoordsT; -extern template struct MCCoords; -extern template struct MCCoords; } // namespace qmcplusplus #endif diff --git a/src/Particle/MCCoordsT.cpp b/src/Particle/MCCoordsT.cpp new file mode 100644 index 0000000000..063641f438 --- /dev/null +++ b/src/Particle/MCCoordsT.cpp @@ -0,0 +1,62 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "MCCoordsT.hpp" + +namespace qmcplusplus +{ +template +void MCCoordsT::getSubset(const std::size_t offset, + const std::size_t size, + MCCoordsT& out) const +{ + std::copy_n(positions.begin() + offset, size, out.positions.begin()); +} + +template +MCCoordsT& MCCoordsT::operator+=(const MCCoordsT& rhs) +{ + assert(positions.size() == rhs.positions.size()); + std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), + [](const PosType& x, const PosType& y) { return x + y; }); + return *this; +} + +template +void MCCoordsT::getSubset(const std::size_t offset, + const std::size_t size, + MCCoordsT& out) const +{ + std::copy_n(positions.begin() + offset, size, out.positions.begin()); + std::copy_n(spins.begin() + offset, size, out.spins.begin()); +} + +template +MCCoordsT& MCCoordsT::operator+=( + const MCCoordsT& rhs) +{ + assert(positions.size() == rhs.positions.size()); + std::transform(positions.begin(), positions.end(), rhs.positions.begin(), positions.begin(), + [](const PosType& x, const PosType& y) { return x + y; }); + std::transform(spins.begin(), spins.end(), rhs.spins.begin(), spins.begin(), + [](const FullPrecRealType& x, const FullPrecRealType& y) { return x + y; }); + return *this; +} + +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT; +template struct MCCoordsT, CoordsType::POS>; +template struct MCCoordsT, CoordsType::POS_SPIN>; +template struct MCCoordsT, CoordsType::POS>; +template struct MCCoordsT, CoordsType::POS_SPIN>; +} // namespace qmcplusplus diff --git a/src/Particle/MCCoordsT.hpp b/src/Particle/MCCoordsT.hpp new file mode 100644 index 0000000000..52c04b1ea5 --- /dev/null +++ b/src/Particle/MCCoordsT.hpp @@ -0,0 +1,70 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +// Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// +// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_MCCOORDST_HPP +#define QMCPLUSPLUS_MCCOORDST_HPP + +#include "ParticleSetTraits.h" +#include "type_traits/complex_help.hpp" + +#include +#include + +namespace qmcplusplus +{ +enum class CoordsType +{ + POS, + POS_SPIN +}; + +template +struct MCCoordsT; + +template +struct MCCoordsT +{ + using PosType = typename ParticleSetTraits::PosType; + + MCCoordsT(const std::size_t size) : positions(size) {} + + MCCoordsT& operator+=(const MCCoordsT& rhs); + + /** get subset of MCCoordsT + * [param,out] out + */ + void getSubset(const std::size_t offset, const std::size_t size, MCCoordsT& out) const; + + std::vector positions; +}; + +template +struct MCCoordsT +{ + using PosType = typename ParticleSetTraits::PosType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + + MCCoordsT(const std::size_t size) : positions(size), spins(size) {} + + MCCoordsT& operator+=(const MCCoordsT& rhs); + + /** get subset of MCCoordsT + * [param,out] out + */ + void getSubset(const std::size_t offset, const std::size_t size, MCCoordsT& out) const; + + std::vector positions; + std::vector spins; +}; +} // namespace qmcplusplus + +#endif diff --git a/src/Particle/MCWalkerConfiguration.h b/src/Particle/MCWalkerConfiguration.h index c32db3f0fb..8e3daf2fab 100644 --- a/src/Particle/MCWalkerConfiguration.h +++ b/src/Particle/MCWalkerConfiguration.h @@ -22,158 +22,13 @@ */ #ifndef QMCPLUSPLUS_MCWALKERCONFIGURATION_H #define QMCPLUSPLUS_MCWALKERCONFIGURATION_H -#include "Particle/ParticleSet.h" -#include "Particle/WalkerConfigurations.h" -#include "Particle/Walker.h" -#include "Particle/SampleStack.h" -#include "Utilities/IteratorUtility.h" -namespace qmcplusplus -{ -//Forward declaration -class MultiChain; -class HDFWalkerOutput; -class Reptile; +#include "Configuration.h" +#include "Particle/MCWalkerConfigurationT.h" -/** A set of walkers that are to be advanced by Metropolis Monte Carlo. - * - *As a derived class from ParticleSet, MCWalkerConfiguration interacts with - *QMCHamiltonian and TrialWaveFunction as a ParticleSet, while QMCDrivers - *use it as multiple walkers whose configurations are advanced according - to MC algorithms. - * - Each walker is represented by Walker and - *MCWalkerConfiguration contains a list of - *the walkers. This class enables two possible moves: - *
    - *
  • move the entire active walkers, similarly to molecu. Suitable for - *small and big systems with a small time step. - *
  • move a particle for each walker. Suitable for large systems. - - *
- */ -class MCWalkerConfiguration : public ParticleSet, public WalkerConfigurations +namespace qmcplusplus { -public: - /**enumeration for update*/ - enum - { - Update_All = 0, ///move all the active walkers - Update_Walker, ///move a walker by walker - Update_Particle ///move a particle by particle - }; - - using Walker_t = WalkerConfigurations::Walker_t; - ///container type of the Properties of a Walker - using PropertyContainer_t = Walker_t::PropertyContainer_t; - ///container type of Walkers - using WalkerList_t = std::vector>; - /// FIX: a type alias of iterator for an object should not be for just one of many objects it holds. - using iterator = WalkerList_t::iterator; - ///const_iterator of Walker container - using const_iterator = WalkerList_t::const_iterator; - - using ReptileList_t = UPtrVector; - - ///default constructor - MCWalkerConfiguration(const SimulationCell& simulation_cell, - const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); - - ///default constructor: copy only ParticleSet - MCWalkerConfiguration(const MCWalkerConfiguration& mcw); - ~MCWalkerConfiguration(); - /** create numWalkers Walkers - * - * Append Walkers to WalkerList. - */ - void createWalkers(int numWalkers); - ///clean up the walker list and make a new list - void resize(int numWalkers, int numPtcls); - - ///clean up the walker list - using WalkerConfigurations::clear; - ///resize Walker::PropertyHistory and Walker::PHindex: - void resizeWalkerHistories(); - - ///make random moves for all the walkers - //void sample(iterator first, iterator last, value_type tauinv); - ///make a random move for a walker - void sample(iterator it, RealType tauinv); - - ///return the number of particles per walker - inline int getParticleNum() const { return R.size(); } - /**@}*/ - - /** set LocalEnergy - * @param e current average Local Energy - */ - inline void setLocalEnergy(RealType e) { LocalEnergy = e; } - - /** return LocalEnergy - */ - inline RealType getLocalEnergy() const { return LocalEnergy; } - - inline MultiChain* getPolymer() { return Polymer; } - - inline void setPolymer(MultiChain* chain) { Polymer = chain; } - - void resetWalkerProperty(int ncopy = 1); - - inline bool updatePbyP() const { return ReadyForPbyP; } - - //@{save/load/clear function for optimization - // - int numSamples() const { return samples.getNumSamples(); } - ///set the number of max samples - void setNumSamples(int n); - ///save the position of current walkers to SampleStack - void saveEnsemble(); - ///save the position of current walkers - void saveEnsemble(iterator first, iterator last); - /// load a single sample from SampleStack - void loadSample(ParticleSet& pset, size_t iw) const; - /// load SampleStack data to the current list of walker configurations - void loadEnsemble(); - /// load the SampleStacks of others to the current list of walker configurations - void loadEnsemble(std::vector& others, bool doclean = true); - /** dump Samples to a file - * @param others MCWalkerConfigurations whose samples will be collected - * @param out engine to write the samples to state_0/walkers - * @param np number of processors - * @return true with non-zero samples - * - * CAUTION: The current implementation assumes the same amount of active walkers on all the MPI ranks. - */ - static bool dumpEnsemble(std::vector& others, HDFWalkerOutput& out, int np, int nBlock); - ///clear the ensemble - void clearEnsemble(); - - const SampleStack& getSampleStack() const { return samples; } - SampleStack& getSampleStack() { return samples; } - - /// Transitional forwarding methods - int getMaxSamples() const; - //@} - -protected: - ///true if the buffer is ready for particle-by-particle updates - bool ReadyForPbyP; - ///update-mode index - int UpdateMode; - - RealType LocalEnergy; - -public: - ///a collection of reptiles contained in MCWalkerConfiguration. - ReptileList_t ReptileList; - Reptile* reptile; - - friend class MCPopulation; - -private: - MultiChain* Polymer; +using MCWalkerConfiguration = MCWalkerConfigurationT; - SampleStack samples; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/MCWalkerConfiguration.cpp b/src/Particle/MCWalkerConfigurationT.cpp similarity index 52% rename from src/Particle/MCWalkerConfiguration.cpp rename to src/Particle/MCWalkerConfigurationT.cpp index be957e6dff..8762d9d7d4 100644 --- a/src/Particle/MCWalkerConfiguration.cpp +++ b/src/Particle/MCWalkerConfigurationT.cpp @@ -16,65 +16,71 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "MCWalkerConfigurationT.h" -#include "MCWalkerConfiguration.h" -#include "ParticleBase/RandomSeqGenerator.h" -#include "Message/Communicate.h" -#include "Message/CommOperators.h" -#include "Utilities/IteratorUtility.h" #include "LongRange/StructFact.h" +#include "Message/CommOperators.h" +#include "Message/Communicate.h" #include "Particle/HDFWalkerOutput.h" #include "Particle/MCSample.h" -#include "Particle/Reptile.h" -#include "hdf/hdf_hyperslab.h" +#include "Particle/ReptileT.h" +#include "ParticleBase/RandomSeqGenerator.h" +#include "Utilities/IteratorUtility.h" #include "hdf/HDFVersion.h" +#include "hdf/hdf_hyperslab.h" + #include namespace qmcplusplus { -MCWalkerConfiguration::MCWalkerConfiguration(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind) - : ParticleSet(simulation_cell, kind), ReadyForPbyP(false), UpdateMode(Update_Walker), reptile(0), Polymer(0) +template +MCWalkerConfigurationT::MCWalkerConfigurationT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind) + : ParticleSetT(simulation_cell, kind), ReadyForPbyP(false), UpdateMode(Update_Walker), reptile(0), Polymer(0) {} -MCWalkerConfiguration::MCWalkerConfiguration(const MCWalkerConfiguration& mcw) - : ParticleSet(mcw), ReadyForPbyP(false), UpdateMode(Update_Walker), Polymer(0) +template +MCWalkerConfigurationT::MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw) + : ParticleSetT(mcw), ReadyForPbyP(false), UpdateMode(Update_Walker), Polymer(0) { samples.clearEnsemble(); samples.setMaxSamples(mcw.getMaxSamples()); - setWalkerOffsets(mcw.getWalkerOffsets()); - Properties = mcw.Properties; + this->setWalkerOffsets(mcw.getWalkerOffsets()); + this->Properties = mcw.Properties; } -MCWalkerConfiguration::~MCWalkerConfiguration() = default; +template +MCWalkerConfigurationT::~MCWalkerConfigurationT() = default; -void MCWalkerConfiguration::createWalkers(int n) +template +void MCWalkerConfigurationT::createWalkers(int n) { - const int old_nw = getActiveWalkers(); - WalkerConfigurations::createWalkers(n, TotalNum); + const int old_nw = this->getActiveWalkers(); + WalkerConfigurationsT::createWalkers(n, this->TotalNum); // no pre-existing walkers, need to initialized based on particleset. if (old_nw == 0) - for (auto& awalker : walker_list_) + for (auto& awalker : this->walker_list_) { - awalker->R = R; - awalker->spins = spins; + awalker->R = this->R; + awalker->spins = this->spins; } resizeWalkerHistories(); } - -void MCWalkerConfiguration::resize(int numWalkers, int numPtcls) +template +void MCWalkerConfigurationT::resize(int numWalkers, int numPtcls) { - if (TotalNum && walker_list_.size()) + if (this->TotalNum && this->walker_list_.size()) app_warning() << "MCWalkerConfiguration::resize cleans up the walker list." << std::endl; - const int old_nw = getActiveWalkers(); - ParticleSet::resize(unsigned(numPtcls)); - WalkerConfigurations::resize(numWalkers, TotalNum); + const int old_nw = this->getActiveWalkers(); + ParticleSetT::resize(unsigned(numPtcls)); + WalkerConfigurationsT::resize(numWalkers, this->TotalNum); // no pre-existing walkers, need to initialized based on particleset. if (old_nw == 0) - for (auto& awalker : walker_list_) + for (auto& awalker : this->walker_list_) { - awalker->R = R; - awalker->spins = spins; + awalker->R = this->R; + awalker->spins = this->spins; } } @@ -84,7 +90,8 @@ void MCWalkerConfiguration::resize(int numWalkers, int numPtcls) * * R + D + X */ -void MCWalkerConfiguration::sample(iterator it, RealType tauinv) +template +void MCWalkerConfigurationT::sample(iterator it, RealType tauinv) { throw std::runtime_error("MCWalkerConfiguration::sample obsolete"); // makeGaussRandom(R); @@ -94,24 +101,28 @@ void MCWalkerConfiguration::sample(iterator it, RealType tauinv) /** reset the Property container of all the walkers */ -void MCWalkerConfiguration::resetWalkerProperty(int ncopy) +template +void MCWalkerConfigurationT::resetWalkerProperty(int ncopy) { - int m(PropertyList.size()); + int m(this->PropertyList.size()); app_log() << " Resetting Properties of the walkers " << ncopy << " x " << m << std::endl; try { - Properties.resize(ncopy, m); + this->Properties.resize(ncopy, m); } catch (std::domain_error& de) { app_error() << de.what() << '\n' - << "This is likely because some object has attempted to add walker properties\n" + << "This is likely because some object has attempted to " + "add walker properties\n" << " in excess of WALKER_MAX_PROPERTIES.\n" - << "build with cmake ... -DWALKER_MAX_PROPERTIES=at_least_properties_required" << std::endl; + << "build with cmake ... " + "-DWALKER_MAX_PROPERTIES=at_least_properties_required" + << std::endl; APP_ABORT("Fatal Exception"); } - for (auto& walker : walker_list_) + for (auto& walker : this->walker_list_) { walker->resizeProperty(ncopy, m); walker->Weight = 1.0; @@ -119,24 +130,26 @@ void MCWalkerConfiguration::resetWalkerProperty(int ncopy) resizeWalkerHistories(); } -void MCWalkerConfiguration::resizeWalkerHistories() +template +void MCWalkerConfigurationT::resizeWalkerHistories() { - //using std::vector > is too costly. - int np = PropertyHistory.size(); + // using std::vector > is too costly. + int np = this->PropertyHistory.size(); if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PropertyHistory = PropertyHistory; - np = PHindex.size(); + for (int iw = 0; iw < this->walker_list_.size(); ++iw) + this->walker_list_[iw]->PropertyHistory = this->PropertyHistory; + np = this->PHindex.size(); if (np) - for (int iw = 0; iw < walker_list_.size(); ++iw) - walker_list_[iw]->PHindex = PHindex; + for (int iw = 0; iw < this->walker_list_.size(); ++iw) + this->walker_list_[iw]->PHindex = this->PHindex; ; } /** allocate the SampleStack * @param n number of samples per thread */ -void MCWalkerConfiguration::setNumSamples(int n) +template +void MCWalkerConfigurationT::setNumSamples(int n) { samples.clearEnsemble(); samples.setMaxSamples(n); @@ -144,11 +157,16 @@ void MCWalkerConfiguration::setNumSamples(int n) /** save the current walkers to SampleStack */ -void MCWalkerConfiguration::saveEnsemble() { saveEnsemble(walker_list_.begin(), walker_list_.end()); } +template +void MCWalkerConfigurationT::saveEnsemble() +{ + saveEnsemble(this->walker_list_.begin(), this->walker_list_.end()); +} /** save the [first,last) walkers to SampleStack */ -void MCWalkerConfiguration::saveEnsemble(iterator first, iterator last) +template +void MCWalkerConfigurationT::saveEnsemble(iterator first, iterator last) { for (; first != last; first++) { @@ -157,35 +175,41 @@ void MCWalkerConfiguration::saveEnsemble(iterator first, iterator last) } /** load a single sample from SampleStack */ -void MCWalkerConfiguration::loadSample(ParticleSet& pset, size_t iw) const { samples.loadSample(pset, iw); } +template +void MCWalkerConfigurationT::loadSample(ParticleSetT& pset, size_t iw) const +{ + samples.loadSample(pset, iw); +} /** load SampleStack to walker_list_ */ -void MCWalkerConfiguration::loadEnsemble() +template +void MCWalkerConfigurationT::loadEnsemble() { using WP = WalkerProperties::Indexes; int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples()); if (samples.empty() || nsamples == 0) return; - Walker_t::PropertyContainer_t prop(1, PropertyList.size(), 1, WP::MAXPROPERTIES); - walker_list_.resize(nsamples); + typename Walker_t::PropertyContainer_t prop(1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); + this->walker_list_.resize(nsamples); for (int i = 0; i < nsamples; ++i) { - auto awalker = std::make_unique(TotalNum); + auto awalker = std::make_unique(this->TotalNum); awalker->Properties.copy(prop); samples.getSample(i).convertToWalker(*awalker); - walker_list_[i] = std::move(awalker); + this->walker_list_[i] = std::move(awalker); } resizeWalkerHistories(); samples.clearEnsemble(); } -bool MCWalkerConfiguration::dumpEnsemble(std::vector& others, - HDFWalkerOutput& out, - int np, - int nBlock) +template +bool MCWalkerConfigurationT::dumpEnsemble(std::vector*>& others, + HDFWalkerOutput& out, + int np, + int nBlock) { - WalkerConfigurations wctemp; + WalkerConfigurationsT wctemp; for (auto* mcwc : others) { const auto& astack(mcwc->getSampleStack()); @@ -203,7 +227,8 @@ bool MCWalkerConfiguration::dumpEnsemble(std::vector& ot if (w == 0) return false; - // The following code assumes the same amount of active walkers on all the MPI ranks + // The following code assumes the same amount of active walkers on all the + // MPI ranks std::vector nwoff(np + 1, 0); for (int ip = 0; ip < np; ++ip) nwoff[ip + 1] = nwoff[ip] + w; @@ -212,9 +237,14 @@ bool MCWalkerConfiguration::dumpEnsemble(std::vector& ot return true; } -int MCWalkerConfiguration::getMaxSamples() const { return samples.getMaxSamples(); } +template +int MCWalkerConfigurationT::getMaxSamples() const +{ + return samples.getMaxSamples(); +} -void MCWalkerConfiguration::loadEnsemble(std::vector& others, bool doclean) +template +void MCWalkerConfigurationT::loadEnsemble(std::vector*>& others, bool doclean) { using WP = WalkerProperties::Indexes; std::vector off(others.size() + 1, 0); @@ -225,19 +255,19 @@ void MCWalkerConfiguration::loadEnsemble(std::vector& ot int nw_tot = off.back(); if (nw_tot) { - Walker_t::PropertyContainer_t prop(1, PropertyList.size(), 1, WP::MAXPROPERTIES); - while (walker_list_.size()) - pop_back(); - walker_list_.resize(nw_tot); + typename Walker_t::PropertyContainer_t prop(1, this->PropertyList.size(), 1, WP::MAXPROPERTIES); + while (this->walker_list_.size()) + this->pop_back(); + this->walker_list_.resize(nw_tot); for (int i = 0; i < others.size(); ++i) { - SampleStack& astack(others[i]->getSampleStack()); + SampleStackT& astack(others[i]->getSampleStack()); for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw) { - auto awalker = std::make_unique(TotalNum); + auto awalker = std::make_unique(this->TotalNum); awalker->Properties.copy(prop); astack.getSample(j).convertToWalker(*awalker); - walker_list_[iw] = std::move(awalker); + this->walker_list_[iw] = std::move(awalker); } if (doclean) others[i]->clearEnsemble(); @@ -247,6 +277,24 @@ void MCWalkerConfiguration::loadEnsemble(std::vector& ot resizeWalkerHistories(); } -void MCWalkerConfiguration::clearEnsemble() { samples.clearEnsemble(); } +template +void MCWalkerConfigurationT::clearEnsemble() +{ + samples.clearEnsemble(); +} + +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class MCWalkerConfigurationT; +#else +template class MCWalkerConfigurationT; +#endif +#else +#ifndef MIXED_PRECISION +template class MCWalkerConfigurationT>; +#else +template class MCWalkerConfigurationT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/Particle/MCWalkerConfigurationT.h b/src/Particle/MCWalkerConfigurationT.h new file mode 100644 index 0000000000..dc4b49fbcc --- /dev/null +++ b/src/Particle/MCWalkerConfigurationT.h @@ -0,0 +1,185 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file MCWalkerConfiguration.h + * @brief Declaration of a MCWalkerConfiguration + */ +#ifndef QMCPLUSPLUS_MCWALKERCONFIGURATIONT_H +#define QMCPLUSPLUS_MCWALKERCONFIGURATIONT_H +#include "Particle/ParticleSetT.h" +#include "Particle/SampleStackT.h" +#include "Particle/Walker.h" +#include "Particle/WalkerConfigurationsT.h" +#include "Utilities/IteratorUtility.h" + +namespace qmcplusplus +{ +// Forward declaration +class MultiChain; +class HDFWalkerOutput; +template +class ReptileT; + +/** A set of walkers that are to be advanced by Metropolis Monte Carlo. + * + *As a derived class from ParticleSet, MCWalkerConfiguration interacts with + *QMCHamiltonian and TrialWaveFunction as a ParticleSet, while QMCDrivers + *use it as multiple walkers whose configurations are advanced according + to MC algorithms. + * + Each walker is represented by Walker and + *MCWalkerConfiguration contains a list of + *the walkers. This class enables two possible moves: + *
    + *
  • move the entire active walkers, similarly to molecu. Suitable for + *small and big systems with a small time step. + *
  • move a particle for each walker. Suitable for large systems. + + *
+ */ +template +class MCWalkerConfigurationT : public ParticleSetT, public WalkerConfigurationsT +{ +public: + /**enumeration for update*/ + enum + { + Update_All = 0, /// move all the active walkers + Update_Walker, /// move a walker by walker + Update_Particle /// move a particle by particle + }; + + using Walker_t = typename WalkerConfigurationsT::Walker_t; + /// container type of the Properties of a Walker + using PropertyContainer_t = typename Walker_t::PropertyContainer_t; + /// container type of Walkers + using WalkerList_t = std::vector>; + /// FIX: a type alias of iterator for an object should not be for just one + /// of many objects it holds. + using iterator = typename WalkerList_t::iterator; + /// const_iterator of Walker container + using const_iterator = typename WalkerList_t::const_iterator; + + using ReptileList_t = UPtrVector>; + + using RealType = typename ParticleSetT::RealType; + + /// default constructor + MCWalkerConfigurationT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); + + /// default constructor: copy only ParticleSet + MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw); + ~MCWalkerConfigurationT(); + /** create numWalkers Walkers + * + * Append Walkers to WalkerList. + */ + void createWalkers(int numWalkers); + /// clean up the walker list and make a new list + void resize(int numWalkers, int numPtcls); + + /// clean up the walker list + using WalkerConfigurationsT::clear; + /// resize Walker::PropertyHistory and Walker::PHindex: + void resizeWalkerHistories(); + + /// make random moves for all the walkers + // void sample(iterator first, iterator last, value_type tauinv); + /// make a random move for a walker + void sample(iterator it, RealType tauinv); + + /// return the number of particles per walker + inline int getParticleNum() const { return this->R.size(); } + /**@}*/ + + /** set LocalEnergy + * @param e current average Local Energy + */ + inline void setLocalEnergy(RealType e) { LocalEnergy = e; } + + /** return LocalEnergy + */ + inline RealType getLocalEnergy() const { return LocalEnergy; } + + inline MultiChain* getPolymer() { return Polymer; } + + inline void setPolymer(MultiChain* chain) { Polymer = chain; } + + void resetWalkerProperty(int ncopy = 1); + + inline bool updatePbyP() const { return ReadyForPbyP; } + + //@{save/load/clear function for optimization + // + int numSamples() const { return samples.getNumSamples(); } + /// set the number of max samples + void setNumSamples(int n); + /// save the position of current walkers to SampleStack + void saveEnsemble(); + /// save the position of current walkers + void saveEnsemble(iterator first, iterator last); + /// load a single sample from SampleStack + void loadSample(ParticleSetT& pset, size_t iw) const; + /// load SampleStack data to the current list of walker configurations + void loadEnsemble(); + /// load the SampleStacks of others to the current list of walker + /// configurations + void loadEnsemble(std::vector*>& others, bool doclean = true); + /** dump Samples to a file + * @param others MCWalkerConfigurations whose samples will be collected + * @param out engine to write the samples to state_0/walkers + * @param np number of processors + * @return true with non-zero samples + * + * CAUTION: The current implementation assumes the same amount of active + * walkers on all the MPI ranks. + */ + static bool dumpEnsemble(std::vector*>& others, HDFWalkerOutput& out, int np, int nBlock); + /// clear the ensemble + void clearEnsemble(); + + const SampleStackT& getSampleStack() const { return samples; } + SampleStackT& getSampleStack() { return samples; } + + /// Transitional forwarding methods + int getMaxSamples() const; + //@} + +protected: + /// true if the buffer is ready for particle-by-particle updates + bool ReadyForPbyP; + /// update-mode index + int UpdateMode; + + RealType LocalEnergy; + +public: + /// a collection of reptiles contained in MCWalkerConfiguration. + ReptileList_t ReptileList; + ReptileT* reptile; + + friend class MCPopulation; + +private: + MultiChain* Polymer; + + SampleStackT samples; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/PSdispatcher.cpp b/src/Particle/PSdispatcher.cpp index 44dbb23965..d86e8fd8a6 100644 --- a/src/Particle/PSdispatcher.cpp +++ b/src/Particle/PSdispatcher.cpp @@ -64,7 +64,7 @@ void PSdispatcher::flex_accept_rejectMove(const RefVectorWithLeader bool forward_mode) const { if (use_batch_) - ParticleSet::mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode); + ParticleSet::mw_accept_rejectMoveT(p_list, iat, isAccepted, forward_mode); else for (size_t iw = 0; iw < p_list.size(); iw++) p_list[iw].accept_rejectMove(iat, isAccepted[iw], forward_mode); diff --git a/src/Particle/PSdispatcher.h b/src/Particle/PSdispatcher.h index 50b03c13a6..36e714adb4 100644 --- a/src/Particle/PSdispatcher.h +++ b/src/Particle/PSdispatcher.h @@ -13,6 +13,7 @@ #ifndef QMCPLUSPLUS_PSDISPATCH_H #define QMCPLUSPLUS_PSDISPATCH_H +#include "MCCoords.hpp" #include "ParticleSet.h" namespace qmcplusplus diff --git a/src/Particle/ParticleIO/LatticeIO.cpp b/src/Particle/ParticleIO/LatticeIO.cpp index 2d8ea238e0..0fe1756969 100644 --- a/src/Particle/ParticleIO/LatticeIO.cpp +++ b/src/Particle/ParticleIO/LatticeIO.cpp @@ -233,6 +233,216 @@ bool LatticeParser::put(xmlNodePtr cur) } +template +bool LatticeParserT::put(xmlNodePtr cur) +{ + const int DIM = ParticleLayout::SingleParticlePos::Size; + double a0 = 1.0; + double rs = -1.0; + int nptcl = 0; + int nsh = 0; //for backwards compatibility w/ odd heg initialization style + int pol = 0; + using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex; + TinyVector bconds("p"); + + Tensor lattice_in; + bool lattice_defined = false; + bool bconds_defined = false; + int boxsum = 0; + + app_summary() << std::endl; + app_summary() << " Lattice" << std::endl; + app_summary() << " -------" << std::endl; + cur = cur->xmlChildrenNode; + while (cur != NULL) + { + std::string cname((const char*)cur->name); + if (cname == "parameter") + { + const std::string aname(getXMLAttributeValue(cur, "name")); + if (aname == "scale") + { + putContent(a0, cur); + } + else if (aname == "lattice") + { + const std::string units_prop(getXMLAttributeValue(cur, "units")); + if (!units_prop.empty() && units_prop != "bohr") + { + std::ostringstream err_msg; + err_msg << "LatticeParser::put. Only atomic units (bohr) supported for lattice units. Input file uses: " + << units_prop; + throw UniformCommunicateError(err_msg.str()); + } + + putContent(lattice_in, cur); + lattice_defined = true; + //putContent(ref_.R,cur); + } + else if (aname == "bconds") + { + putContent(bconds, cur); + bconds_defined = true; + for (int idir = 0; idir < DIM; idir++) + { + char b = bconds[idir][0]; + if (b == 'n' || b == 'N') + { + ref_.BoxBConds[idir] = false; + } + else if (b == 'p' || b == 'P') + { + ref_.BoxBConds[idir] = true; + boxsum++; + } + else + { + std::ostringstream err_msg; + err_msg << "LatticeParser::put. Unknown label '" + bconds[idir] + + "' used for periodicity. Only 'p', 'P', 'n' and 'N' are valid!"; + throw UniformCommunicateError(err_msg.str()); + } + + // Protect BCs which are not implemented. + if (idir > 0 && !ref_.BoxBConds[idir - 1] && ref_.BoxBConds[idir]) + { + std::ostringstream err_msg; + err_msg + << "LatticeParser::put. In \"bconds\", non periodic directions must be placed after the periodic ones."; + throw UniformCommunicateError(err_msg.str()); + } + } + } + else if (aname == "vacuum") + { + putContent(ref_.VacuumScale, cur); + } + else if (aname == "LR_dim_cutoff") + { + putContent(ref_.LR_dim_cutoff, cur); + } + else if (aname == "LR_handler") + { + std::string handler_type("opt_breakup"); + //This chops whitespace so the simple str == comparisons work + putContent(handler_type, cur); + handler_type = lowerCase(handler_type); + if (handler_type == "ewald") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::EWALD; + else if (handler_type == "opt_breakup") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::ESLER; + else if (handler_type == "opt_breakup_original") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::NATOLI; + else if (handler_type == "ewald_strict2d") + { + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::STRICT2D; + ref_.ndim = 2; + } + else if (handler_type == "ewald_quasi2d") + LRCoulombSingleton::this_lr_type = LRCoulombSingleton::QUASI2D; + else + throw UniformCommunicateError("LatticeParser::put. Long range breakup handler not recognized."); + } + else if (aname == "LR_tol") + { + putContent(ref_.LR_tol, cur); + } + else if (aname == "rs") + { + lattice_defined = true; + OhmmsAttributeSet rAttrib; + rAttrib.add(nptcl, "condition"); + rAttrib.add(pol, "polarized"); + rAttrib.add(nsh, "shell"); + rAttrib.put(cur); + putContent(rs, cur); + } + else if (aname == "nparticles") + { + putContent(nptcl, cur); + } + } + cur = cur->next; + } + + // checking boundary conditions + if (lattice_defined) + { + if (!bconds_defined) + { + app_log() << " Lattice is specified but boundary conditions are not. Assuming PBC." << std::endl; + ref_.BoxBConds = true; + } + } + else if (boxsum == 0) + app_log() << " Lattice is not specified for the Open BC. Add a huge box." << std::endl; + else + throw UniformCommunicateError("LatticeParser::put. Mixed boundary is supported only when a lattice is specified!"); + + //special heg processing + if (rs > 0.0) + { + HEGGrid heg(ref_); + if (pol == 0) + { + if (nsh > 0) + nptcl = 2 * heg.getNumberOfKpoints(nsh); + else + nsh = heg.getShellIndex(nptcl / 2); + } + else + { // spin polarized + if (nsh > 0) + nptcl = heg.getNumberOfKpoints(nsh); + else + nsh = heg.getShellIndex(nptcl); + } + typename ParticleLayout::Scalar_t acubic = heg.getCellLength(nptcl, rs); + app_log() << " " << OHMMS_DIM << "D HEG system" + << "\n rs = " << rs; + if (pol == 0) + { + app_log() << "\n number of up particles = " << nptcl / 2 << "\n number of dn particles = " << nptcl / 2; + } + else + { + app_log() << "\n number of up particles = " << nptcl; + } + app_log() << "\n filled kshells = " << nsh << "\n lattice constant = " << acubic << " bohr" + << std::endl; + lattice_in = 0.0; + for (int idim = 0; idim < DIM; idim++) + lattice_in(idim, idim) = acubic; + a0 = 1.0; + } + + if (lattice_defined) + { + lattice_in *= a0; + ref_.set(lattice_in); + } + + if (ref_.SuperCellEnum != SUPERCELL_SLAB && LRCoulombSingleton::isQuasi2D()) + throw UniformCommunicateError("LatticeParser::put. Quasi 2D Ewald only works with boundary condition 'p p n'!"); + + if (ref_.SuperCellEnum == SUPERCELL_OPEN) + ref_.WignerSeitzRadius = ref_.SimulationCellRadius; + + std::string unit_name = "bohr"; + app_log() << std::fixed; + app_log() << " Simulation cell radius = " << ref_.SimulationCellRadius << " " << unit_name << std::endl; + app_log() << " Wigner-Seitz cell radius = " << ref_.WignerSeitzRadius << " " << unit_name << std::endl; + app_log() << std::endl; + + return lattice_defined; +} + +template class LatticeParserT; +template class LatticeParserT; +template class LatticeParserT>; +template class LatticeParserT>; + + bool LatticeXMLWriter::get(std::ostream& os) const { os << "" << std::endl; diff --git a/src/Particle/ParticleIO/LatticeIO.h b/src/Particle/ParticleIO/LatticeIO.h index a52e17858d..41e3da8790 100644 --- a/src/Particle/ParticleIO/LatticeIO.h +++ b/src/Particle/ParticleIO/LatticeIO.h @@ -17,6 +17,7 @@ #include "OhmmsData/OhmmsElementBase.h" #include "Configuration.h" +#include "ParticleSetTraits.h" namespace qmcplusplus { @@ -31,6 +32,18 @@ class LatticeParser }; +template +class LatticeParserT +{ + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + ParticleLayout& ref_; + +public: + LatticeParserT(ParticleLayout& lat) : ref_(lat) {} + bool put(xmlNodePtr cur); +}; + + class LatticeXMLWriter { using ParticleLayout = PtclOnLatticeTraits::ParticleLayout; diff --git a/src/Particle/ParticleIO/XMLParticleIO.cpp b/src/Particle/ParticleIO/XMLParticleIO.cpp index 26b9d658e7..ee0cfba8f2 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.cpp +++ b/src/Particle/ParticleIO/XMLParticleIO.cpp @@ -487,6 +487,405 @@ void XMLParticleParser::getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_si } + +template +XMLParticleParserT::XMLParticleParserT(Particle_t& aptcl) : ref_(aptcl) +{ + //add ref particle attributes + ref_.createAttributeList(ref_AttribList); +} + +/** process xmlnode <particleset/> which contains everything about the particle set to initialize + *@param cur the xmlnode to work on + * + */ +template +bool XMLParticleParserT::readXML(xmlNodePtr cur) +{ + ReportEngine PRE("XMLParticleParser", "readXML"); + + if (ref_.getTotalNum()) + throw UniformCommunicateError("The ParticleSet object to load XML input was not empty. Report a bug!"); + + SpeciesSet& tspecies(ref_.getSpeciesSet()); + if (tspecies.size() != 0) + throw UniformCommunicateError("The SpeciesSet object to load XML input was not empty. Report a bug!"); + + // the total number of particles, once it is set non-zero, always check against it. + int nat = 0; + // the number of particles by group, once it is constructed, always check against it. + std::vector nat_group; + + std::string pname("none"); + std::string randomizeR("no"); + OhmmsAttributeSet pAttrib; + pAttrib.add(randomizeR, "random"); + pAttrib.add(nat, "size"); + pAttrib.add(pname, "name"); + pAttrib.put(cur); + + ref_.setName(pname.c_str()); + + if (nat != 0) + { + app_debug() << "Set the total size " << nat + << " by the 'size' attribute found in 'particleset' XML element node named '" << pname << "'." + << std::endl; + } + + bool ionid_found = false; + { // parse all the 'group's to obtain or verify the total number of particles + //total count of the particles to be created + int ntot = 0; + int num_non_zero_group = 0; + bool group_found = false; + + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "atom") + throw UniformCommunicateError("XML element node atom is no more supported"); + else if (cname.find("ell") < cname.size()) //accept UnitCell, unitcell, supercell + throw UniformCommunicateError("Constructing cell inside particleset is illegal!"); + else if (cname == "group") + { + group_found = true; + std::string sname = getXMLAttributeValue(element, "name"); + if (sname.empty()) + throw UniformCommunicateError("'group' element node must include a name attribute!"); + else + { + const int sid = tspecies.addSpecies(sname); + setSpeciesProperty(tspecies, sid, element); + } + + int nat_per_group = 0; + OhmmsAttributeSet gAttrib; + gAttrib.add(nat_per_group, "size"); + gAttrib.put(element); + + nat_group.push_back(nat_per_group); + ntot += nat_per_group; + if (nat_per_group > 0) + num_non_zero_group++; + } + else if (cname == attrib_tag && getXMLAttributeValue(element, "name") == ionid_tag) + ionid_found = true; + }); + + if (!group_found) + throw UniformCommunicateError("No 'group' XML element node was found. Check XML input!"); + + if (nat != 0 && ntot != 0 && nat != ntot) + { + std::ostringstream msg; + msg << "The total number of particles deterimined previously was " << nat + << "but the sum of the sizes from all the 'group' XML element nodes is " << ntot + << ". Please check the 'particleset' XML element node!" << std::endl; + throw UniformCommunicateError(msg.str()); + } + + if (nat == 0 && ntot != 0) + { + nat = ntot; + app_debug() << "Set the total size " << nat << " by the sum of the 'size's on all the 'group' XML element nodes." + << std::endl; + } + + if (ntot > 0 && num_non_zero_group != nat_group.size()) + throw UniformCommunicateError("Some 'group' XML element node doesn't contain a 'size' attribute! 'size = 0' is not allowed in the input. Make appropriate adjustments to the input or converter."); + } + + { // parse all the 'attrib's to obtain or verify the total number of particles + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag) + { + std::string sname = getXMLAttributeValue(element, "name"); + if (sname.empty()) + throw UniformCommunicateError("'" + ParticleTags::attrib_tag + + "' XML element node must include a name attribute!"); + + int size_att = 0; + OhmmsAttributeSet aAttrib; + aAttrib.add(size_att, "size"); + aAttrib.put(element); + + if (nat != 0 && size_att != 0 && nat != size_att) + { + std::ostringstream msg; + msg << "The total number of particles deterimined previously was " << nat + << " but the 'size' atttribute found on the '" << ParticleTags::attrib_tag + << "' XML element nodes named '" << sname << "' is " << size_att + << ". Please check the 'particleset' XML element node!" << std::endl; + throw UniformCommunicateError(msg.str()); + } + + if (nat == 0 && size_att != 0) + { + nat = size_att; + app_debug() << "Set the total size " << nat << " by the 'size' on the '" << ParticleTags::attrib_tag + << "' XML element node named '" << sname << "'." << std::endl; + } + } + }); + } + + if (nat == 0) + throw UniformCommunicateError("Failed in figuring out the total number of particles. Check XML input!"); + + if (ionid_found) + { // parse ionid and construct input order to stored order + std::vector map_storage_to_input(nat); + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag && getXMLAttributeValue(element, "name") == ionid_tag) + { + std::string datatype = getXMLAttributeValue(element, datatype_tag); + if (datatype != stringtype_tag) + throw UniformCommunicateError("'ionid' only supports datatype=\"" + stringtype_tag + "\""); + std::vector d_in(nat); + putContent(d_in, element); + bool input_ungrouped = false; + int storage_index = 0; + for (int ig = 0; ig < nat_group.size(); ig++) + { + const auto& group_species_name = tspecies.getSpeciesName(ig); + int count_group_size = 0; + for (int iat = 0; iat < nat; iat++) + { + const int element_index = tspecies.findSpecies(d_in[iat]); + if (element_index == tspecies.size()) + throw UniformCommunicateError("Element " + d_in[iat] + + " doesn't match any species from 'group' XML element nodes."); + if (element_index == ig) + { + if (iat != storage_index) + input_ungrouped = true; + count_group_size++; + map_storage_to_input[storage_index++] = iat; + } + } + + if (count_group_size == 0) + throw UniformCommunicateError("Element '" + group_species_name + "' not found in 'ionid'."); + + if (nat_group[ig] == 0) + nat_group[ig] = count_group_size; + else if (nat_group[ig] != count_group_size) + { + std::ostringstream msg; + msg << "The number of particles of element '" << group_species_name << "' from 'group' XML elment node was " + << nat_group[ig] << " but 'ionid' contains " << count_group_size << " entries." << std::endl; + throw UniformCommunicateError(msg.str()); + } + } + + if (input_ungrouped) + { + app_log() << " Input particle set is not grouped by species. Remapping particle position indices " + "internally." + << std::endl; + app_debug() << " Species : input particle index -> internal particle index" << std::endl; + for (int new_idx = 0; new_idx < map_storage_to_input.size(); new_idx++) + { + int old_idx = map_storage_to_input[new_idx]; + if (new_idx != old_idx) + { + app_debug() << " " << d_in[old_idx] << " : " << old_idx << " -> " << new_idx << std::endl; + } + } + } + } + }); + + checkGrouping(nat, nat_group); + ref_.create(nat_group); + // save map_storage_to_input + ref_.setMapStorageToInput(map_storage_to_input); + + for (int iat = 0; iat < nat; iat++) + { + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag && getXMLAttributeValue(element, "name") != ionid_tag) + getPtclAttrib(element, map_storage_to_input[iat], 1, iat); + }); + } + } + else + { + // fix old input with positions outside 'group' + if (nat_group.size() == 1 && nat_group[0] == 0) + nat_group[0] = nat; + + checkGrouping(nat, nat_group); + ref_.create(nat_group); + + // obtain 'attrib' inside 'group' + size_t start = 0; + size_t ig = 0; + processChildren(cur, [&](const std::string& cname, const xmlNodePtr child) { + if (cname == "group") + { + processChildren(child, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == attrib_tag) + getPtclAttrib(element, 0, nat_group[ig], start); + }); + start += nat_group[ig]; + ig++; + } + else if (cname == attrib_tag) + { + if (nat_group.size() > 1) + throw UniformCommunicateError("An 'attrib' XML element node was found outside 'group'" + " without XML element node named 'ionid'." + " Cannot map particles to more than one species. Check XML input!"); + getPtclAttrib(child, 0, nat, 0); + } + }); + } + + if (ref_.getLattice().SuperCellEnum) + { + if (randomizeR == "yes") + { + makeUniformRandom(ref_.R); + ref_.R.setUnit(PosUnit::Lattice); + ref_.convert2Cart(ref_.R); + makeUniformRandom(ref_.spins); + ref_.spins *= 2 * M_PI; + } + else // put them [0,1) in the cell + ref_.applyBC(ref_.R); + } + + //this sets Mass, Z + ref_.resetGroups(); + ref_.createSK(); + + return true; +} + +template +void XMLParticleParserT::checkGrouping(int nat, const std::vector& nat_group) const +{ + app_debug() << "There are " << nat << " particles in " << nat_group.size() << " species containing:" << std::endl; + for (int ig = 0; ig < nat_group.size(); ig++) + { + const auto& group_species_name = ref_.getSpeciesSet().getSpeciesName(ig); + if (nat_group[ig] == 0) + throw UniformCommunicateError("Element '" + group_species_name + "' was provided but never referenced."); + app_debug() << " " << nat_group[ig] << " '" << group_species_name << "'" << std::endl; + } + + if (std::accumulate(nat_group.begin(), nat_group.end(), 0) != nat) + throw UniformCommunicateError( + "The total number of particles doesn't match the sum of the particle counts of all the species."); +} + +/** process xmlnode to reset the properties of a particle set + * @param cur current node + * @return true, if successful + * + * This resets or adds new attributes to a particle set. + * It cannot modify the size of the particle set. + */ +template +bool XMLParticleParserT::reset(xmlNodePtr cur) +{ + ReportEngine PRE("XMLParticleParser", "reset"); + SpeciesSet& tspecies(ref_.getSpeciesSet()); + cur = cur->xmlChildrenNode; + while (cur != NULL) + { + std::string cname((const char*)cur->name); + if (cname == "group") + { + std::string sname; + OhmmsAttributeSet gAttrib; + gAttrib.add(sname, "name"); + gAttrib.put(cur); + if (sname.size()) + { + int sid = tspecies.addSpecies(sname); + setSpeciesProperty(tspecies, sid, cur); + } + } + cur = cur->next; + } + // //@todo Will add a member function to ParticleSet to handle these + // int massind=tspecies.addAttribute("mass"); + // for(int iat=0; iat +void XMLParticleParserT::getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_size, int out_offset) +{ + std::string oname, otype; + int utype = 0; + int size_in = 0; + OhmmsAttributeSet pAttrib; + pAttrib.add(otype, datatype_tag); //datatype + pAttrib.add(oname, "name"); //name + pAttrib.add(utype, condition_tag); //condition + pAttrib.add(size_in, "size"); //size + pAttrib.put(cur); + if (oname.empty() || otype.empty()) + { + app_error() << " Missing attrib/@name or attrib/@datatype " << std::endl; + app_error() << R"( )" << std::endl; + return; + } + int t_id = ref_AttribList.getAttribType(otype); + + if (oname == ionid_tag) + throw UniformCommunicateError("'ionid' should not be parsed by getPtclAttrib."); + else + { + //very permissive in that a unregistered attribute will be created and stored by ParticleSet + //cloning is not going to work + if (t_id == PA_IndexType) + { + ParticleIndex* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_ScalarType) + { + ParticleScalar* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_PositionType) + { + ParticlePos* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + else if (t_id == PA_TensorType) + { + ParticleTensor* obj = nullptr; + obj = ref_AttribList.getAttribute(otype, oname, obj); + ParticleAttribXmlNode a(*obj, static_cast(utype)); + a.put(cur, in_offset, copy_size, out_offset); + } + } +} + +#ifndef QMC_COMPLEX +template class XMLParticleParserT; +template class XMLParticleParserT; +#else +template class XMLParticleParserT>; +template class XMLParticleParserT>; +#endif + XMLSaveParticle::XMLSaveParticle(Particle_t& pin) : ref_(pin) {} XMLSaveParticle::~XMLSaveParticle() {} diff --git a/src/Particle/ParticleIO/XMLParticleIO.h b/src/Particle/ParticleIO/XMLParticleIO.h index c05590ea3c..11a0c41a87 100644 --- a/src/Particle/ParticleIO/XMLParticleIO.h +++ b/src/Particle/ParticleIO/XMLParticleIO.h @@ -18,6 +18,7 @@ #include "OhmmsData/OhmmsElementBase.h" #include "OhmmsData/RecordProperty.h" #include "Particle/ParticleSet.h" +#include "Particle/ParticleSetT.h" namespace qmcplusplus { @@ -139,6 +140,41 @@ class XMLParticleParser : public ParticleTags bool reset(xmlNodePtr cur); }; +template +class XMLParticleParserT : public ParticleTags +{ + using Particle_t = ParticleSetT; + using ParticleIndex = typename Particle_t::ParticleIndex; + using ParticleScalar = typename Particle_t::ParticleScalar; + using ParticlePos = typename Particle_t::ParticlePos; + using ParticleTensor = typename Particle_t::ParticleTensor; + + Particle_t& ref_; + AttribListType ref_AttribList; + + /** read the data of a particle attribute + *@param cur the xmlnode + *@param in_offset the location offset to read from XML element node body. + *@param copy_size the number of particle attributes to be read + *@param out_offset the current local count to which copy_size particle attributes are added. + */ + void getPtclAttrib(xmlNodePtr cur, int in_offset, int copy_size, int out_offset); + + void checkGrouping(int nat, const std::vector& nat_group) const; + +public: + /**constructor + *@param aptcl the particleset to be initialized + */ + XMLParticleParserT(Particle_t& aptcl); + + bool readXML(xmlNodePtr cur); + + /** reset the properties of a particle set + */ + bool reset(xmlNodePtr cur); +}; + class XMLSaveParticle : public ParticleTags, public RecordProperty { using Particle_t = ParticleSet; diff --git a/src/Particle/ParticleSet.h b/src/Particle/ParticleSet.h index 247f5ca66a..8004876a02 100644 --- a/src/Particle/ParticleSet.h +++ b/src/Particle/ParticleSet.h @@ -19,682 +19,13 @@ #ifndef QMCPLUSPLUS_PARTICLESET_H #define QMCPLUSPLUS_PARTICLESET_H -#include -#include -#include "ParticleTags.h" -#include "DynamicCoordinates.h" -#include "Walker.h" -#include "ResourceHandle.h" -#include "SpeciesSet.h" -#include "Pools/PooledData.h" -#include "OhmmsPETE/OhmmsArray.h" -#include "Utilities/TimerManager.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "type_traits/template_types.hpp" -#include "SimulationCell.h" -#include "MCCoords.hpp" -#include "DTModes.h" +#include "Particle/ParticleSetT.h" +#include "Particle/SimulationCell.h" namespace qmcplusplus { -///forward declaration of DistanceTable -class DistanceTable; -class DistanceTableAA; -class DistanceTableAB; -class ResourceCollection; -class StructFact; -struct SKMultiWalkerMem; -/** Specialized paritlce class for atomistic simulations - * - * Derived from QMCTraits, ParticleBase and OhmmsElementBase. - * The ParticleLayout class represents a supercell with/without periodic boundary - * conditions. The ParticleLayout class also takes care of spatial decompositions - * for efficient evaluations for the interactions with a finite cutoff. - */ -class ParticleSet : public QMCTraits, public OhmmsElementBase, public PtclOnLatticeTraits -{ -public: - /// walker type - using Walker_t = Walker; - /// container type to store the property - using PropertyContainer_t = Walker_t::PropertyContainer_t; - /// buffer type for a serialized buffer - using Buffer_t = PooledData; - - enum quantum_domains - { - no_quantum_domain = 0, - classical, - quantum - }; - - ///quantum_domain of the particles, default = classical - quantum_domains quantum_domain; - - //@{ public data members - ///Species ID - ParticleIndex GroupID; - ///Position - ParticlePos R; - ///internal spin variables for dynamical spin calculations - ParticleScalar spins; - ///gradients of the particles - ParticleGradient G; - ///laplacians of the particles - ParticleLaplacian L; - ///mass of each particle - ParticleScalar Mass; - ///charge of each particle - ParticleScalar Z; - - ///the index of the active bead for particle-by-particle moves - Index_t activeBead; - ///the direction reptile traveling - Index_t direction; - - ///Particle density in G-space for MPC interaction - std::vector> DensityReducedGvecs; - std::vector Density_G; - Array Density_r; - - /// DFT potential - std::vector> VHXCReducedGvecs; - std::vector VHXC_G[2]; - Array VHXC_r[2]; - - /** name-value map of Walker Properties - * - * PropertyMap is used to keep the name-value mapping of - * Walker_t::Properties. PropertyList::Values are not - * necessarily updated during the simulations. - */ - PropertySetType PropertyList; - - /** properties of the current walker - * - * The internal order is identical to PropertyList, which holds - * the matching names. - */ - PropertyContainer_t Properties; - - /** observables in addition to those registered in Properties/PropertyList - * - * Such observables as density, gofr, sk are not stored per walker but - * collected during QMC iterations. - */ - Buffer_t Collectables; - - ///Property history vector - std::vector> PropertyHistory; - std::vector PHindex; - ///@} - - ///current MC step - int current_step; - - ///default constructor - ParticleSet(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); - - ///copy constructor - ParticleSet(const ParticleSet& p); - - ///default destructor - ~ParticleSet() override; - - /** create grouped particles - * @param agroup number of particles per group - */ - void create(const std::vector& agroup); - - /** print particle coordinates to a std::ostream - * @param os output stream - * @param maxParticlesToPrint maximal number of particles to print. Pass 0 to print all. - */ - void print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; - - ///dummy. For satisfying OhmmsElementBase. - bool get(std::ostream& os) const override; - ///dummy. For satisfying OhmmsElementBase. - bool put(std::istream&) override; - ///dummy. For satisfying OhmmsElementBase. - void reset() override; - - ///initialize ParticleSet from xmlNode - bool put(xmlNodePtr cur) override; - - ///specify quantum_domain of particles - void setQuantumDomain(quantum_domains qdomain); - - void set_quantum() { quantum_domain = quantum; } - - inline bool is_classical() const { return quantum_domain == classical; } - - inline bool is_quantum() const { return quantum_domain == quantum; } - - ///check whether quantum domain is valid for particles - inline bool quantumDomainValid(quantum_domains qdomain) const { return qdomain != no_quantum_domain; } - - ///check whether quantum domain is valid for particles - inline bool quantumDomainValid() const { return quantumDomainValid(quantum_domain); } - - /** add a distance table - * @param psrc source particle set - * @param modes bitmask DistanceTable::DTModes - * - * if this->myName == psrc.getName(), AA type. Otherwise, AB type. - */ - int addTable(const ParticleSet& psrc, DTModes modes = DTModes::ALL_OFF); - - ///get a distance table by table_ID - inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; } - ///get a distance table by table_ID and dyanmic_cast to DistanceTableAA - const DistanceTableAA& getDistTableAA(int table_ID) const; - ///get a distance table by table_ID and dyanmic_cast to DistanceTableAB - const DistanceTableAB& getDistTableAB(int table_ID) const; - - /** reset all the collectable quantities during a MC iteration - */ - inline void resetCollectables() { std::fill(Collectables.begin(), Collectables.end(), 0.0); } - - /** update the internal data - *@param skip SK update if skipSK is true - */ - void update(bool skipSK = false); - - /// batched version of update - static void mw_update(const RefVectorWithLeader& p_list, bool skipSK = false); - - /** create Structure Factor with PBCs - */ - void createSK(); - - bool hasSK() const { return bool(structure_factor_); } - - /** return Structure Factor - */ - const StructFact& getSK() const - { - assert(structure_factor_); - return *structure_factor_; - }; - - /** Turn on per particle storage in Structure Factor - */ - void turnOnPerParticleSK(); - - /** Get state (on/off) of per particle storage in Structure Factor - */ - bool getPerParticleSKState() const; - - ///retrun the SpeciesSet of this particle set - inline SpeciesSet& getSpeciesSet() { return my_species_; } - ///retrun the const SpeciesSet of this particle set - inline const SpeciesSet& getSpeciesSet() const { return my_species_; } - - ///return parent's name - inline const std::string& parentName() const { return ParentName; } - inline void setName(const std::string& aname) - { - myName = aname; - if (ParentName == "0") - { - ParentName = aname; - } - } - - inline const DynamicCoordinates& getCoordinates() const { return *coordinates_; } - - void resetGroups(); - - const auto& getSimulationCell() const { return simulation_cell_; } - const auto& getLattice() const { return simulation_cell_.getLattice(); } - auto& getPrimitiveLattice() const { return const_cast(simulation_cell_.getPrimLattice()); } - const auto& getLRBox() const { return simulation_cell_.getLRBox(); } - - inline bool isSameMass() const { return same_mass_; } - inline bool isSpinor() const { return is_spinor_; } - inline void setSpinor(bool is_spinor) { is_spinor_ = is_spinor; } - - /// return active particle id - inline Index_t getActivePtcl() const { return active_ptcl_; } - inline const PosType& getActivePos() const { return active_pos_; } - inline Scalar_t getActiveSpinVal() const { return active_spin_val_; } - - /// return the active position if the particle is active or the return current position if not - inline const PosType& activeR(int iat) const - { - // When active_ptcl_ == iat, a move has been proposed. - return (active_ptcl_ == iat) ? active_pos_ : R[iat]; - } - - /// return the active spin value if the particle is active or return the current spin value if not - inline const Scalar_t& activeSpin(int iat) const - { - // When active_ptcl_ == iat, a move has been proposed. - return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat]; - } - - /** move the iat-th particle to active_pos_ - * @param iat the index of the particle to be moved - * @param displ the displacement of the iat-th particle position - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - * - * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTable::Temp. - * If maybe_accept = false, certain operations for accepting moves will be skipped for optimal performance. - */ - void makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true); - /// makeMove, but now includes an update to the spin variable - void makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); - - /// batched version of makeMove - template - static void mw_makeMove(const RefVectorWithLeader& p_list, int iat, const MCCoords& displs); - - static void mw_makeMove(const RefVectorWithLeader& p_list, - int iat, - const std::vector& displs); - - /// batched version makeMove for spin variable only - static void mw_makeSpinMove(const RefVectorWithLeader& p_list, - int iat, - const std::vector& sdispls); - - /** move the iat-th particle to active_pos_ - * @param iat the index of the particle to be moved - * @param displ random displacement of the iat-th particle - * @return true, if the move is valid - * - * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a proposed move. - * Evaluate the related distance table data DistanceTable::Temp. - * - * When a Lattice is defined, passing two checks makes a move valid. - * outOfBound(displ): invalid move, if displ is larger than half, currently, of the box in any direction - * isValid(Lattice.toUnit(active_pos_)): invalid move, if active_pos_ goes out of the Lattice in any direction marked with open BC. - * Note: active_pos_ and distances tables are always evaluated no matter the move is valid or not. - */ - bool makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ); - /// makeMoveAndCheck, but now includes an update to the spin variable - bool makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); - - /** Handles virtual moves for all the particles to a single newpos. - * - * The state active_ptcl_ remains -1 and rejectMove is not needed. - * acceptMove can not be used. - * See QMCHamiltonians::MomentumEstimator as an example - */ - void makeVirtualMoves(const SingleParticlePos& newpos); - - /** move all the particles of a walker - * @param awalker the walker to operate - * @param deltaR proposed displacement - * @param dt factor of deltaR - * @return true if all the moves are legal. - * - * If big displacements or illegal positions are detected, return false. - * If all good, R = awalker.R + dt* deltaR - */ - bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt); - - bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, const std::vector& dt); - - /** move all the particles including the drift - * - * Otherwise, everything is the same as makeMove for a walker - */ - bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - RealType dt); - - bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - const std::vector& dt); - - /** accept or reject a proposed move - * Two operation modes: - * The using and updating distance tables via `ParticleSet` operate in two modes, regular and forward modes. - * - * Regular mode - * The regular mode can only be used when the distance tables for particle pairs are fully up-to-date. - * This is the case after calling `ParticleSet::update()` in a unit test or after p-by-p moves in a QMC driver. - * In this mode, the distance tables remain up-to-date after calling `ParticleSet::acceptMove` - * and calling `ParticleSet::rejectMove` is not mandatory. - * - * Forward mode - * The forward mode assumes that distance table is not fully up-to-date until every particle is accepted - * or rejected to move once in order. This is the mode used in the p-by-p part of drivers. - * In this mode, calling `ParticleSet::accept_rejectMove` is required to handle accept/reject rather than - * calling individual `ParticleSet::acceptMove` and `ParticleSet::reject`. - * `ParticleSet::accept_rejectMove(iel)` ensures the distance tables (jel < iel) part is fully up-to-date - * regardless a move is accepted or rejected. For this reason, the rejecting operation inside - * `ParticleSet::accept_rejectMove` involves writing the distances with respect to the old particle position. - */ - void accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true); - - /** accept the move and update the particle attribute by the proposed move in regular mode - *@param iat the index of the particle whose position and other attributes to be updated - */ - void acceptMove(Index_t iat); - - /** reject a proposed move in regular mode - * @param iat the electron whose proposed move gets rejected. - */ - void rejectMove(Index_t iat); - - /// batched version of acceptMove and rejectMove fused, templated on CoordsType - template - static void mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode = true); - - /// batched version of acceptMove and rejectMove fused - static void mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode = true); - - /** batched version of acceptMove and reject Move fused, but only for spins - * - * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_ gets reset to -1 - * This would cause the assertion that we have the right particle index to fail if done in the - * wrong order - */ - static void mw_accept_rejectSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted); - - void initPropertyList(); - inline int addProperty(const std::string& pname) { return PropertyList.add(pname.c_str()); } - - int addPropertyHistory(int leng); - // void rejectedMove(); - // void resetPropertyHistory( ); - // void addPropertyHistoryPoint(int index, RealType data); - - void convert(const ParticlePos& pin, ParticlePos& pout); - void convert2Unit(const ParticlePos& pin, ParticlePos& pout); - void convert2Cart(const ParticlePos& pin, ParticlePos& pout); - void convert2Unit(ParticlePos& pout); - void convert2Cart(ParticlePos& pout); - void convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout); - void convert2CartInBox(const ParticlePos& pint, ParticlePos& pout); - - void applyBC(const ParticlePos& pin, ParticlePos& pout); - void applyBC(ParticlePos& pos); - void applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last); - void applyMinimumImage(ParticlePos& pinout) const; - - /** load a Walker_t to the current ParticleSet - * @param awalker the reference to the walker to be loaded - * @param pbyp true if it is used by PbyP update - * - * PbyP requires the distance tables and Sk with awalker.R - */ - void loadWalker(Walker_t& awalker, bool pbyp); - /** batched version of loadWalker */ - static void mw_loadWalker(const RefVectorWithLeader& p_list, - const RefVector& walkers, - const std::vector& recompute, - bool pbyp); - - /** save this to awalker - * - * just the R, G, and L - * More duplicate data that makes code difficult to reason about should be removed. - */ - void saveWalker(Walker_t& awalker); - - /** batched version of saveWalker - * - * just the R, G, and L - */ - static void mw_saveWalker(const RefVectorWithLeader& psets, const RefVector& walkers); - - /** update structure factor and unmark active_ptcl_ - *@param skip SK update if skipSK is true - * - * The Coulomb interaction evaluation needs the structure factor. - * For these reason, call donePbyP after the loop of single - * electron moves before evaluating the Hamiltonian. Unmark - * active_ptcl_ is more of a safety measure probably not needed. - */ - void donePbyP(bool skipSK = false); - /// batched version of donePbyP - static void mw_donePbyP(const RefVectorWithLeader& p_list, bool skipSK = false); - - ///return the address of the values of Hamiltonian terms - inline FullPrecRealType* restrict getPropertyBase() { return Properties.data(); } - - ///return the address of the values of Hamiltonian terms - inline const FullPrecRealType* restrict getPropertyBase() const { return Properties.data(); } - - ///return the address of the i-th properties - inline FullPrecRealType* restrict getPropertyBase(int i) { return Properties[i]; } - - ///return the address of the i-th properties - inline const FullPrecRealType* restrict getPropertyBase(int i) const { return Properties[i]; } - - inline void setTwist(const SingleParticlePos& t) { myTwist = t; } - inline const SingleParticlePos& getTwist() const { return myTwist; } - - /** Initialize particles around another ParticleSet - * Used to initialize an electron ParticleSet by an ion ParticleSet - */ - void randomizeFromSource(ParticleSet& src); - - /** get species name of particle i - */ - inline const std::string& species_from_index(int i) { return my_species_.speciesName[GroupID[i]]; } - - inline size_t getTotalNum() const { return TotalNum; } - - inline void clear() - { - TotalNum = 0; - - R.clear(); - spins.clear(); - GroupID.clear(); - G.clear(); - L.clear(); - Mass.clear(); - Z.clear(); - - coordinates_->resize(0); - } - - ///return the number of groups - inline int groups() const { return group_offsets_->size() - 1; } - - ///return the first index of a group i - inline int first(int igroup) const { return (*group_offsets_)[igroup]; } - - ///return the last index of a group i - inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } - - ///return the group id of a given particle in the particle set. - inline int getGroupID(int iat) const - { - assert(iat >= 0 && iat < TotalNum); - return GroupID[iat]; - } - - ///return the size of a group - inline int groupsize(int igroup) const { return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup]; } - - ///add attributes to list for IO - template - inline void createAttributeList(ATList& AttribList) - { - R.setTypeName(ParticleTags::postype_tag); - R.setObjName(ParticleTags::position_tag); - spins.setTypeName(ParticleTags::scalartype_tag); - spins.setObjName(ParticleTags::spins_tag); - GroupID.setTypeName(ParticleTags::indextype_tag); - GroupID.setObjName(ParticleTags::ionid_tag); - //add basic attributes - AttribList.add(R); - AttribList.add(spins); - AttribList.add(GroupID); - - G.setTypeName(ParticleTags::gradtype_tag); - L.setTypeName(ParticleTags::laptype_tag); - - G.setObjName("grad"); - L.setObjName("lap"); - - AttribList.add(G); - AttribList.add(L); - - //more particle attributes - Mass.setTypeName(ParticleTags::scalartype_tag); - Mass.setObjName("mass"); - AttribList.add(Mass); - - Z.setTypeName(ParticleTags::scalartype_tag); - Z.setObjName("charge"); - AttribList.add(Z); - } - - inline void setMapStorageToInput(const std::vector& mapping) { map_storage_to_input_ = mapping; } - inline const std::vector& get_map_storage_to_input() const { return map_storage_to_input_; } - - inline int getNumDistTables() const { return DistTables.size(); } - - inline auto& get_group_offsets() const { return *group_offsets_; } - - /// initialize a shared resource and hand it to a collection - void createResource(ResourceCollection& collection) const; - /** acquire external resource and assocaite it with the list of ParticleSet - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& p_list); - /** release external resource - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& p_list); - - static RefVectorWithLeader extractDTRefList(const RefVectorWithLeader& p_list, int id); - static RefVectorWithLeader extractCoordsRefList(const RefVectorWithLeader& p_list); - static RefVectorWithLeader extractSKRefList(const RefVectorWithLeader& p_list); - -protected: - /// reference to global simulation cell - const SimulationCell& simulation_cell_; - - ///true if the particles have the same mass - bool same_mass_; - ///true is a dynamic spin calculation - bool is_spinor_; - /** the index of the active particle during particle-by-particle moves - * - * when a single particle move is proposed, the particle id is assigned to active_ptcl_ - * No matter the move is accepted or rejected, active_ptcl_ is marked back to -1. - * This state flag is used for picking coordinates and distances for SPO evaluation. - */ - Index_t active_ptcl_; - ///the proposed position of active_ptcl_ during particle-by-particle moves - SingleParticlePos active_pos_; - ///the proposed spin of active_ptcl_ during particle-by-particle moves - Scalar_t active_spin_val_; - - /** Map storage index to the input index. - * If not empty, particles were reordered by groups when being loaded from XML input. - * When other input data are affected by reordering, its builder should query this mapping. - * map_storage_to_input_[5] = 2 means the index 5(6th) particle in this ParticleSet was read from - * the index 2(3th) particle in the XML input - */ - std::vector map_storage_to_input_; - - ///SpeciesSet of particles - SpeciesSet my_species_; - - ///Structure factor - std::unique_ptr structure_factor_; - - ///multi walker structure factor data - ResourceHandle mw_structure_factor_data_handle_; - - /** map to handle distance tables - * - * myDistTableMap[source-particle-tag]= locator in the distance table - * myDistTableMap[ObjectTag] === 0 - */ - std::map myDistTableMap; - - /// distance tables that need to be updated by moving this ParticleSet - std::vector> DistTables; - - /// Descriptions from distance table creation. Same order as DistTables. - std::vector distTableDescriptions; - - TimerList_t myTimers; - - SingleParticlePos myTwist; - - std::string ParentName; - - ///total number of particles - size_t TotalNum; - - ///array to handle a group of distinct particles per species - std::shared_ptr>> group_offsets_; - - ///internal representation of R. It can be an SoA copy of R - std::unique_ptr coordinates_; - - /** compute temporal DistTables and SK for a new particle position - * - * @param iat the particle that is moved on a sphere - * @param newpos a new particle position - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - */ - void computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true); - - - /** compute temporal DistTables and SK for a new particle position for each walker in a batch - * - * @param p_list the list of wrapped ParticleSet references in a walker batch - * @param iat the particle that is moved on a sphere - * @param new_positions new particle positions - * @param maybe_accept if false, the caller guarantees that the proposed move will not be accepted. - */ - static void mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& new_positions, - bool maybe_accept = true); - - /** actual implemenation for accepting a proposed move in forward mode - * - * @param iat the index of the particle whose position and other attributes to be updated - */ - void acceptMoveForwardMode(Index_t iat); - - /** reject a proposed move in forward mode - * @param iat the electron whose proposed move gets rejected. - */ - void rejectMoveForwardMode(Index_t iat); - - /// resize internal storage - inline void resize(size_t numPtcl) - { - TotalNum = numPtcl; - - R.resize(numPtcl); - spins.resize(numPtcl); - GroupID.resize(numPtcl); - G.resize(numPtcl); - L.resize(numPtcl); - Mass.resize(numPtcl); - Z.resize(numPtcl); - - coordinates_->resize(numPtcl); - } -}; +using ParticleSet = ParticleSetT; } // namespace qmcplusplus #endif diff --git a/src/Particle/ParticleSetPool.h b/src/Particle/ParticleSetPool.h index 63f252a997..08cc089678 100644 --- a/src/Particle/ParticleSetPool.h +++ b/src/Particle/ParticleSetPool.h @@ -18,107 +18,13 @@ #ifndef QMCPLUSPLUS_PARTICLESETPOOL_H #define QMCPLUSPLUS_PARTICLESETPOOL_H -#include "OhmmsData/OhmmsElementBase.h" -#include "Particle/MCWalkerConfiguration.h" -#include "Message/MPIObjectBase.h" -#include "SimulationCell.h" +#include "Configuration.h" +#include "Particle/ParticleSetPoolT.h" namespace qmcplusplus { -/** @ingroup qmcapp - * @brief Manage a collection of ParticleSet objects - * - * This object handles \ elements and - * functions as a builder class for ParticleSet objects. - */ -class ParticleSetPool : public MPIObjectBase -{ -public: - using PoolType = std::map>; - - /** constructor - * @param aname xml tag - */ - ParticleSetPool(Communicate* c, const char* aname = "particleset"); - ~ParticleSetPool(); - - ParticleSetPool(const ParticleSetPool&) = delete; - ParticleSetPool& operator=(const ParticleSetPool&) = delete; - ParticleSetPool(ParticleSetPool&& pset) noexcept; - ParticleSetPool& operator=(ParticleSetPool&&) = default; - - bool put(xmlNodePtr cur); - bool get(std::ostream& os) const; - void reset(); - - void output_particleset_info(Libxml2Document& doc, xmlNodePtr root); - - /** initialize the supercell shared by all the particle sets - * - * return value is never checked anywhere - * side effect simulation_cell_ UPtr is set - * to particle layout created on heap. - * This is later directly assigned to pset member variable Lattice. - */ - bool readSimulationCellXML(xmlNodePtr cur); - - ///return true, if the pool is empty - inline bool empty() const { return myPool.empty(); } - - /** add a ParticleSet* to the pool with its ownership transferred - * ParticleSet built outside the ParticleSetPool must be constructed with - * the simulation cell from this->simulation_cell_. - */ - void addParticleSet(std::unique_ptr&& p); - - /** get a named ParticleSet - * @param pname name of the ParticleSet - * @return a MCWalkerConfiguration object with pname - * - * When the named ParticleSet is not in this object, return 0. - */ - ParticleSet* getParticleSet(const std::string& pname); - - /** get a named MCWalkerConfiguration - * @param pname name of the MCWalkerConfiguration - * @return a MCWalkerConfiguration object with pname - * - * When the named MCWalkerConfiguration is not in this object, return 0. - */ - MCWalkerConfiguration* getWalkerSet(const std::string& pname); - - /** get the Pool object - */ - inline const PoolType& getPool() const { return myPool; } - - /// get simulation cell - const auto& getSimulationCell() const { return *simulation_cell_; } - - /// set simulation cell - void setSimulationCell(const SimulationCell& simulation_cell) { *simulation_cell_ = simulation_cell; } - /** randomize a particleset particleset/@random='yes' && particleset@random_source exists - */ - void randomize(); +using ParticleSetPool = ParticleSetPoolT; -private: - /** global simulation cell - * - * updated by - * - readSimulationCellXML() parsing element - * - setSimulationCell() - */ - std::unique_ptr simulation_cell_; - /** List of ParticleSet owned - * - * Each ParticleSet has to have a unique name which is used as a key for the map. - */ - PoolType myPool; - /** xml node for random initialization. - * - * randomize() process initializations just before starting qmc sections - */ - std::vector randomize_nodes; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/ParticleSetPool.cpp b/src/Particle/ParticleSetPoolT.cpp similarity index 74% rename from src/Particle/ParticleSetPool.cpp rename to src/Particle/ParticleSetPoolT.cpp index 7f4cb7f3a5..0b9e8ea142 100644 --- a/src/Particle/ParticleSetPool.cpp +++ b/src/Particle/ParticleSetPoolT.cpp @@ -12,41 +12,45 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - /**@file ParticleSetPool.cpp * @brief Implements ParticleSetPool operators. */ -#include "ParticleSetPool.h" +#include "ParticleSetPoolT.h" + +#include "LongRange/LRCoulombSingleton.h" +#include "OhmmsData/AttributeSet.h" +#include "OhmmsData/Libxml2Doc.h" +#include "Particle/InitMolecularSystemT.h" #include "ParticleBase/RandomSeqGenerator.h" -#include "ParticleIO/XMLParticleIO.h" #include "ParticleIO/LatticeIO.h" +#include "ParticleIO/XMLParticleIO.h" #include "Utilities/ProgressReportEngine.h" -#include "OhmmsData/AttributeSet.h" -#include "OhmmsData/Libxml2Doc.h" -#include "Particle/InitMolecularSystem.h" -#include "LongRange/LRCoulombSingleton.h" #include #include namespace qmcplusplus { -ParticleSetPool::ParticleSetPool(Communicate* c, const char* aname) - : MPIObjectBase(c), simulation_cell_(std::make_unique()) +template +ParticleSetPoolT::ParticleSetPoolT(Communicate* c, const char* aname) + : MPIObjectBase(c), simulation_cell_(std::make_unique>()) { ClassName = "ParticleSetPool"; myName = aname; } -ParticleSetPool::ParticleSetPool(ParticleSetPool&& other) noexcept +template +ParticleSetPoolT::ParticleSetPoolT(ParticleSetPoolT&& other) noexcept : MPIObjectBase(other.myComm), simulation_cell_(std::move(other.simulation_cell_)), myPool(std::move(other.myPool)) { ClassName = other.ClassName; myName = other.myName; } -ParticleSetPool::~ParticleSetPool() = default; +template +ParticleSetPoolT::~ParticleSetPoolT() = default; -ParticleSet* ParticleSetPool::getParticleSet(const std::string& pname) +template +ParticleSetT* ParticleSetPoolT::getParticleSet(const std::string& pname) { if (auto pit = myPool.find(pname); pit == myPool.end()) return nullptr; @@ -54,9 +58,10 @@ ParticleSet* ParticleSetPool::getParticleSet(const std::string& pname) return pit->second.get(); } -MCWalkerConfiguration* ParticleSetPool::getWalkerSet(const std::string& pname) +template +MCWalkerConfigurationT* ParticleSetPoolT::getWalkerSet(const std::string& pname) { - auto mc = dynamic_cast(getParticleSet(pname)); + auto mc = dynamic_cast*>(getParticleSet(pname)); if (mc == nullptr) { throw std::runtime_error("ParticleSePool::getWalkerSet missing " + pname); @@ -64,7 +69,8 @@ MCWalkerConfiguration* ParticleSetPool::getWalkerSet(const std::string& pname) return mc; } -void ParticleSetPool::addParticleSet(std::unique_ptr&& p) +template +void ParticleSetPoolT::addParticleSet(std::unique_ptr>&& p) { const auto pit(myPool.find(p->getName())); if (pit == myPool.end()) @@ -72,7 +78,8 @@ void ParticleSetPool::addParticleSet(std::unique_ptr&& p) auto& pname = p->getName(); LOGMSG(" Adding " << pname << " ParticleSet to the pool") if (&p->getSimulationCell() != simulation_cell_.get()) - throw std::runtime_error("Bug detected! ParticleSetPool::addParticleSet requires p created with the simulation " + throw std::runtime_error("Bug detected! ParticleSetPool::addParticleSet requires p " + "created with the simulation " "cell from ParticleSetPool."); myPool.emplace(pname, std::move(p)); } @@ -80,14 +87,15 @@ void ParticleSetPool::addParticleSet(std::unique_ptr&& p) throw std::runtime_error(p->getName() + " exists. Cannot be added again."); } -bool ParticleSetPool::readSimulationCellXML(xmlNodePtr cur) +template +bool ParticleSetPoolT::readSimulationCellXML(xmlNodePtr cur) { ReportEngine PRE("ParticleSetPool", "putLattice"); bool lattice_defined = false; try { - LatticeParser a(simulation_cell_->lattice_); + LatticeParserT a(simulation_cell_->lattice_); lattice_defined = a.put(cur); } catch (const UniformCommunicateError& ue) @@ -114,7 +122,8 @@ bool ParticleSetPool::readSimulationCellXML(xmlNodePtr cur) * Creating MCWalkerConfiguration for all the ParticleSet * objects. */ -bool ParticleSetPool::put(xmlNodePtr cur) +template +bool ParticleSetPoolT::put(xmlNodePtr cur) { ReportEngine PRE("ParticleSetPool", "put"); std::string id("e"); @@ -133,10 +142,10 @@ bool ParticleSetPool::put(xmlNodePtr cur) pAttrib.add(spinor, "spinor", {"no", "yes"}); pAttrib.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values); pAttrib.put(cur); - //backward compatibility + // backward compatibility if (id == "e" && role == "none") role = "MC"; - ParticleSet* pTemp = getParticleSet(id); + ParticleSetT* pTemp = getParticleSet(id); if (pTemp == 0) { const bool use_offload = CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET; @@ -148,15 +157,15 @@ bool ParticleSetPool::put(xmlNodePtr cur) // select OpenMP offload implementation in ParticleSet. if (use_offload) - pTemp = new MCWalkerConfiguration(*simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD); + pTemp = new MCWalkerConfigurationT(*simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD); else - pTemp = new MCWalkerConfiguration(*simulation_cell_, DynamicCoordinateKind::DC_POS); + pTemp = new MCWalkerConfigurationT(*simulation_cell_, DynamicCoordinateKind::DC_POS); myPool.emplace(id, pTemp); try { - XMLParticleParser pread(*pTemp); + XMLParticleParserT pread(*pTemp); pread.readXML(cur); } catch (const UniformCommunicateError& ue) @@ -164,7 +173,7 @@ bool ParticleSetPool::put(xmlNodePtr cur) myComm->barrier_and_abort(ue.what()); } - //if random_source is given, create a node + // if random_source is given, create a node if (randomR == "yes" && !randomsrc.empty()) { xmlNodePtr anode = xmlNewNode(NULL, (const xmlChar*)"init"); @@ -186,14 +195,15 @@ bool ParticleSetPool::put(xmlNodePtr cur) return true; } -void ParticleSetPool::randomize() +template +void ParticleSetPoolT::randomize() { app_log() << "ParticleSetPool::randomize " << randomize_nodes.size() << " ParticleSet" << (randomize_nodes.size() == 1 ? "" : "s") << "." << std::endl; bool success = true; for (int i = 0; i < randomize_nodes.size(); ++i) { - InitMolecularSystem moinit(*this); + InitMolecularSystemT moinit(*this); success &= moinit.put(randomize_nodes[i]); xmlFreeNode(randomize_nodes[i]); } @@ -202,7 +212,8 @@ void ParticleSetPool::randomize() throw std::runtime_error("ParticleSePool::randomize failed to randomize some Particlesets!"); } -bool ParticleSetPool::get(std::ostream& os) const +template +bool ParticleSetPoolT::get(std::ostream& os) const { os << "ParticleSetPool has: " << std::endl << std::endl; os.setf(std::ios::scientific, std::ios::floatfield); @@ -215,10 +226,11 @@ bool ParticleSetPool::get(std::ostream& os) const return true; } -void ParticleSetPool::output_particleset_info(Libxml2Document& doc, xmlNodePtr root) +template +void ParticleSetPoolT::output_particleset_info(Libxml2Document& doc, xmlNodePtr root) { xmlNodePtr particles_info = doc.addChild(root, "particles"); - PoolType::const_iterator it(myPool.begin()), it_end(myPool.end()); + typename PoolType::const_iterator it(myPool.begin()), it_end(myPool.end()); while (it != it_end) { xmlNodePtr particle = doc.addChild(particles_info, "particle"); @@ -230,10 +242,25 @@ void ParticleSetPool::output_particleset_info(Libxml2Document& doc, xmlNodePtr r /** reset is used to initialize and evaluate the distance tables */ -void ParticleSetPool::reset() +template +void ParticleSetPoolT::reset() { for (const auto& [key, pset] : myPool) pset->update(); } +// explicit instantiations +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class ParticleSetPoolT; +#else +template class ParticleSetPoolT; +#endif +#else +#ifndef MIXED_PRECISION +template class ParticleSetPoolT>; +#else +template class ParticleSetPoolT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/Particle/ParticleSetPoolT.h b/src/Particle/ParticleSetPoolT.h new file mode 100644 index 0000000000..e2511b15f0 --- /dev/null +++ b/src/Particle/ParticleSetPoolT.h @@ -0,0 +1,124 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_PARTICLESETPOOLT_H +#define QMCPLUSPLUS_PARTICLESETPOOLT_H + +#include "Message/MPIObjectBase.h" +#include "OhmmsData/OhmmsElementBase.h" +#include "Particle/MCWalkerConfigurationT.h" +#include "ParticleSetT.h" +#include "SimulationCellT.h" + +namespace qmcplusplus +{ +/** @ingroup qmcapp + * @brief Manage a collection of ParticleSet objects + * + * This object handles \ elements and + * functions as a builder class for ParticleSet objects. + */ +template +class ParticleSetPoolT : public MPIObjectBase +{ +public: + using PoolType = std::map>>; + + /** constructor + * @param aname xml tag + */ + ParticleSetPoolT(Communicate* c, const char* aname = "particleset"); + ~ParticleSetPoolT(); + + ParticleSetPoolT(const ParticleSetPoolT&) = delete; + ParticleSetPoolT& operator=(const ParticleSetPoolT&) = delete; + ParticleSetPoolT(ParticleSetPoolT&& pset) noexcept; + ParticleSetPoolT& operator=(ParticleSetPoolT&&) = default; + + bool put(xmlNodePtr cur); + bool get(std::ostream& os) const; + void reset(); + + void output_particleset_info(Libxml2Document& doc, xmlNodePtr root); + + /** initialize the supercell shared by all the particle sets + * + * return value is never checked anywhere + * side effect simulation_cell_ UPtr is set + * to particle layout created on heap. + * This is later directly assigned to pset member variable Lattice. + */ + bool readSimulationCellXML(xmlNodePtr cur); + + /// return true, if the pool is empty + inline bool empty() const { return myPool.empty(); } + + /** add a ParticleSet* to the pool with its ownership transferred + * ParticleSet built outside the ParticleSetPool must be constructed with + * the simulation cell from this->simulation_cell_. + */ + void addParticleSet(std::unique_ptr>&& p); + + /** get a named ParticleSet + * @param pname name of the ParticleSet + * @return a MCWalkerConfiguration object with pname + * + * When the named ParticleSet is not in this object, return 0. + */ + ParticleSetT* getParticleSet(const std::string& pname); + + /** get a named MCWalkerConfiguration + * @param pname name of the MCWalkerConfiguration + * @return a MCWalkerConfiguration object with pname + * + * When the named MCWalkerConfiguration is not in this object, return 0. + */ + MCWalkerConfigurationT* getWalkerSet(const std::string& pname); + + /** get the Pool object + */ + inline const PoolType& getPool() const { return myPool; } + + /// get simulation cell + const auto& getSimulationCell() const { return *simulation_cell_; } + + /// set simulation cell + void setSimulationCell(const SimulationCellT& simulation_cell) { *simulation_cell_ = simulation_cell; } + + /** randomize a particleset particleset/@random='yes' && + * particleset@random_source exists + */ + void randomize(); + +private: + /** global simulation cell + * + * updated by + * - readSimulationCellXML() parsing element + * - setSimulationCell() + */ + std::unique_ptr> simulation_cell_; + /** List of ParticleSet owned + * + * Each ParticleSet has to have a unique name which is used as a key for the + * map. + */ + PoolType myPool; + /** xml node for random initialization. + * + * randomize() process initializations just before starting qmc sections + */ + std::vector randomize_nodes; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/ParticleSet.BC.cpp b/src/Particle/ParticleSetT.BC.cpp similarity index 98% rename from src/Particle/ParticleSet.BC.cpp rename to src/Particle/ParticleSetT.BC.cpp index d772a1bf61..3fb57f5f79 100644 --- a/src/Particle/ParticleSet.BC.cpp +++ b/src/Particle/ParticleSetT.BC.cpp @@ -15,10 +15,10 @@ /**@file ParticleSet.BC.cpp * @brief definition of functions controlling Boundary Conditions */ -#include "Particle/ParticleSet.h" +#include "Particle/ParticleSetT.h" #include "Particle/FastParticleOperators.h" #include "Concurrency/OpenMP.h" -#include "LongRange/StructFact.h" +#include "LongRange/StructFactT.h" namespace qmcplusplus { diff --git a/src/Particle/ParticleSet.cpp b/src/Particle/ParticleSetT.cpp similarity index 56% rename from src/Particle/ParticleSet.cpp rename to src/Particle/ParticleSetT.cpp index d297c7444f..0f51ace986 100644 --- a/src/Particle/ParticleSet.cpp +++ b/src/Particle/ParticleSetT.cpp @@ -16,18 +16,23 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "ParticleSetT.h" -#include -#include -#include "ParticleSet.h" +#include "Concurrency/OpenMP.h" +#include "Particle/DistanceTableT.h" #include "Particle/DynamicCoordinatesBuilder.h" -#include "Particle/DistanceTable.h" -#include "Particle/createDistanceTable.h" -#include "LongRange/StructFact.h" -#include "Utilities/IteratorUtility.h" -#include "Utilities/RandomGenerator.h" +#include "Particle/FastParticleOperators.h" +#include "Particle/LongRange/StructFactT.h" +#include "Particle/createDistanceTableT.h" #include "ParticleBase/RandomSeqGeneratorGlobal.h" #include "ResourceCollection.h" +#include "Utilities/IteratorUtility.h" +#include "Utilities/RandomGenerator.h" +#include "Particle/FastParticleOperators.h" +#include "Concurrency/OpenMP.h" + +#include +#include namespace qmcplusplus { @@ -55,7 +60,8 @@ static const TimerNameList_t generatePSetTimerNames(std::string& obj {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}}; } -ParticleSet::ParticleSet(const SimulationCell& simulation_cell, const DynamicCoordinateKind kind) +template +ParticleSetT::ParticleSetT(const SimulationCellT& simulation_cell, const DynamicCoordinateKind kind) : quantum_domain(classical), Properties(0, 0, 1, WP::MAXPROPERTIES), simulation_cell_(simulation_cell), @@ -68,12 +74,13 @@ ParticleSet::ParticleSet(const SimulationCell& simulation_cell, const DynamicCoo ParentName("0"), TotalNum(0), group_offsets_(std::make_shared>>()), - coordinates_(createDynamicCoordinates(kind)) + coordinates_(createDynamicCoordinatesT(kind)) { initPropertyList(); } -ParticleSet::ParticleSet(const ParticleSet& p) +template +ParticleSetT::ParticleSetT(const ParticleSetT& p) : Properties(p.Properties), simulation_cell_(p.simulation_cell_), same_mass_(true), @@ -96,33 +103,36 @@ ParticleSet::ParticleSet(const ParticleSet& p) GroupID = p.GroupID; is_spinor_ = p.is_spinor_; - //need explicit copy: + // need explicit copy: Mass = p.Mass; Z = p.Z; - //std::ostringstream o; - //o<setName(o.str()); - //app_log() << " Copying a particle set " << p.getName() << " to " << this->getName() << " groups=" << groups() << std::endl; + // std::ostringstream o; + // o<setName(o.str()); + // app_log() << " Copying a particle set " << p.getName() << " to " << + // this->getName() << " groups=" << groups() << std::endl; myName = p.getName(); PropertyList.Names = p.PropertyList.Names; PropertyList.Values = p.PropertyList.Values; PropertyHistory = p.PropertyHistory; Collectables = p.Collectables; - //construct the distance tables with the same order + // construct the distance tables with the same order for (int i = 0; i < p.DistTables.size(); ++i) addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes()); if (p.structure_factor_) - structure_factor_ = std::make_unique(*p.structure_factor_); + structure_factor_ = std::make_unique>(*p.structure_factor_); myTwist = p.myTwist; G = p.G; L = p.L; } -ParticleSet::~ParticleSet() = default; +template +ParticleSetT::~ParticleSetT() = default; -void ParticleSet::create(const std::vector& agroup) +template +void ParticleSetT::create(const std::vector& agroup) { auto& group_offsets(*group_offsets_); group_offsets.resize(agroup.size() + 1); @@ -139,15 +149,18 @@ void ParticleSet::create(const std::vector& agroup) GroupID[loc] = i; } -void ParticleSet::setQuantumDomain(quantum_domains qdomain) +template +void ParticleSetT::setQuantumDomain(quantum_domains qdomain) { if (quantumDomainValid(qdomain)) quantum_domain = qdomain; else - throw std::runtime_error("ParticleSet::setQuantumDomain\n input quantum domain is not valid for particles"); + throw std::runtime_error("ParticleSet::setQuantumDomain\n input " + "quantum domain is not valid for particles"); } -void ParticleSet::resetGroups() +template +void ParticleSetT::resetGroups() { const int nspecies = my_species_.getTotalNum(); // Usually an empty ParticleSet indicates an error in the input file, @@ -194,7 +207,8 @@ void ParticleSet::resetGroups() assert(GroupID[iat] < nspecies); } -void ParticleSet::randomizeFromSource(ParticleSet& src) +template +void ParticleSetT::randomizeFromSource(ParticleSetT& src) { SpeciesSet& srcSpSet(src.getSpeciesSet()); SpeciesSet& spSet(getSpeciesSet()); @@ -206,7 +220,7 @@ void ParticleSet::randomizeFromSource(ParticleSet& src) int Nptcl = getTotalNum(); int NumSpecies = spSet.TotalNum; int NumSrcSpecies = srcSpSet.TotalNum; - //Store information about charges and number of each species + // Store information about charges and number of each species std::vector Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec; Zat.resize(Nsrc); Zspec.resize(NumSrcSpecies); @@ -272,7 +286,8 @@ void ParticleSet::randomizeFromSource(ParticleSet& src) } } -void ParticleSet::print(std::ostream& os, const size_t maxParticlesToPrint) const +template +void ParticleSetT::print(std::ostream& os, const size_t maxParticlesToPrint) const { os << " ParticleSet '" << getName() << "' contains " << TotalNum << " particles : "; if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0) @@ -297,17 +312,37 @@ void ParticleSet::print(std::ostream& os, const size_t maxParticlesToPrint) cons os << std::endl; } -bool ParticleSet::get(std::ostream& is) const { return true; } -bool ParticleSet::put(std::istream& is) { return true; } -void ParticleSet::reset() { app_log() << "<<<< going to set properties >>>> " << std::endl; } +template +bool ParticleSetT::get(std::ostream& is) const +{ + return true; +} -///read the particleset -bool ParticleSet::put(xmlNodePtr cur) { return true; } +template +bool ParticleSetT::put(std::istream& is) +{ + return true; +} -int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes) +template +void ParticleSetT::reset() +{ + app_log() << "<<<< going to set properties >>>> " << std::endl; +} + +/// read the particleset +template +bool ParticleSetT::put(xmlNodePtr cur) +{ + return true; +} + +template +int ParticleSetT::addTable(const ParticleSetT& psrc, DTModes modes) { if (myName == "none" || psrc.getName() == "none") - throw std::runtime_error("ParticleSet::addTable needs proper names for both source and target particle sets."); + throw std::runtime_error("ParticleSet::addTable needs proper names for " + "both source and target particle sets."); int tid; std::map::iterator tit(myDistTableMap.find(psrc.getName())); @@ -316,9 +351,9 @@ int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes) std::ostringstream description; tid = DistTables.size(); if (myName == psrc.getName()) - DistTables.push_back(createDistanceTable(*this, description)); + DistTables.push_back(createDistanceTableT(*this, description)); else - DistTables.push_back(createDistanceTable(psrc, *this, description)); + DistTables.push_back(createDistanceTableT(psrc, *this, description)); distTableDescriptions.push_back(description.str()); myDistTableMap[psrc.getName()] = tid; app_debug() << " ... ParticleSet::addTable Create Table #" << tid << " " << DistTables[tid]->getName() @@ -336,17 +371,20 @@ int ParticleSet::addTable(const ParticleSet& psrc, DTModes modes) return tid; } -const DistanceTableAA& ParticleSet::getDistTableAA(int table_ID) const +template +const DistanceTableAAT& ParticleSetT::getDistTableAA(int table_ID) const { - return dynamic_cast(*DistTables[table_ID]); + return dynamic_cast&>(*DistTables[table_ID]); } -const DistanceTableAB& ParticleSet::getDistTableAB(int table_ID) const +template +const DistanceTableABT& ParticleSetT::getDistTableAB(int table_ID) const { - return dynamic_cast(*DistTables[table_ID]); + return dynamic_cast&>(*DistTables[table_ID]); } -void ParticleSet::update(bool skipSK) +template +void ParticleSetT::update(bool skipSK) { ScopedTimer update_scope(myTimers[PS_update]); @@ -359,12 +397,13 @@ void ParticleSet::update(bool skipSK) active_ptcl_ = -1; } -void ParticleSet::mw_update(const RefVectorWithLeader& p_list, bool skipSK) +template +void ParticleSetT::mw_update(const RefVectorWithLeader& p_list, bool skipSK) { auto& p_leader = p_list.getLeader(); ScopedTimer update_scope(p_leader.myTimers[PS_update]); - for (ParticleSet& pset : p_list) + for (ParticleSetT& pset : p_list) pset.coordinates_->setAllParticlePos(pset.R); auto& dts = p_leader.DistTables; @@ -379,7 +418,8 @@ void ParticleSet::mw_update(const RefVectorWithLeader& p_list, bool p_list[iw].structure_factor_->updateAllPart(p_list[iw]); } -void ParticleSet::makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept) +template +void ParticleSetT::makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept) { active_ptcl_ = iat; active_pos_ = R[iat] + displ; @@ -387,23 +427,28 @@ void ParticleSet::makeMove(Index_t iat, const SingleParticlePos& displ, bool may computeNewPosDistTables(iat, active_pos_, maybe_accept); } -void ParticleSet::makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) +template +void ParticleSetT::makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) { makeMove(iat, displ); active_spin_val_ += sdispl; } +template template -void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, Index_t iat, const MCCoords& displs) +void ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT& displs) { mw_makeMove(p_list, iat, displs.positions); if constexpr (CT == CoordsType::POS_SPIN) mw_makeSpinMove(p_list, iat, displs.spins); } -void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& displs) +template +void ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& displs) { std::vector new_positions; new_positions.reserve(displs.size()); @@ -418,15 +463,17 @@ void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, mw_computeNewPosDistTables(p_list, iat, new_positions); } -void ParticleSet::mw_makeSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& sdispls) +template +void ParticleSetT::mw_makeSpinMove(const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& sdispls) { for (int iw = 0; iw < p_list.size(); iw++) p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw]; } -bool ParticleSet::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ) +template +bool ParticleSetT::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ) { active_ptcl_ = iat; active_pos_ = R[iat] + displ; @@ -448,14 +495,16 @@ bool ParticleSet::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ) return is_valid; } -bool ParticleSet::makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) +template +bool ParticleSetT::makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl) { bool is_valid = makeMoveAndCheck(iat, displ); active_spin_val_ += sdispl; return is_valid; } -void ParticleSet::computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept) +template +void ParticleSetT::computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept) { ScopedTimer compute_newpos_scope(myTimers[PS_newpos]); @@ -463,12 +512,13 @@ void ParticleSet::computeNewPosDistTables(Index_t iat, const SingleParticlePos& DistTables[i]->move(*this, newpos, iat, maybe_accept); } -void ParticleSet::mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& new_positions, - bool maybe_accept) +template +void ParticleSetT::mw_computeNewPosDistTables(const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& new_positions, + bool maybe_accept) { - ParticleSet& p_leader = p_list.getLeader(); + ParticleSetT& p_leader = p_list.getLeader(); ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]); { @@ -486,13 +536,14 @@ void ParticleSet::mw_computeNewPosDistTables(const RefVectorWithLeadermw_move(dt_list, p_list, new_positions, iat, maybe_accept); } - // DistTables mw_move calls are asynchronous. Wait for them before return. + // DistTables mw_move calls are asynchronous. Wait for them before + // return. PRAGMA_OFFLOAD("omp taskwait") } } - -bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt) +template +bool ParticleSetT::makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt) { active_ptcl_ = -1; auto& Lattice = simulation_cell_.getLattice(); @@ -519,13 +570,14 @@ bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, const ParticlePo DistTables[i]->evaluate(*this); if (structure_factor_) structure_factor_->updateAllPart(*this); - //every move is valid + // every move is valid return true; } -bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, - const ParticlePos& deltaR, - const std::vector& dt) +template +bool ParticleSetT::makeMoveAllParticles(const Walker_t& awalker, + const ParticlePos& deltaR, + const std::vector& dt) { active_ptcl_ = -1; auto& Lattice = simulation_cell_.getLattice(); @@ -552,7 +604,7 @@ bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, DistTables[i]->evaluate(*this); if (structure_factor_) structure_factor_->updateAllPart(*this); - //every move is valid + // every move is valid return true; } @@ -561,12 +613,14 @@ bool ParticleSet::makeMoveAllParticles(const Walker_t& awalker, * @param drift drift vector * @param deltaR random displacement * @param dt timestep - * @return true, if all the particle moves are legal under the boundary conditions + * @return true, if all the particle moves are legal under the boundary + * conditions */ -bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - RealType dt) +template +bool ParticleSetT::makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, + const ParticlePos& deltaR, + RealType dt) { active_ptcl_ = -1; auto& Lattice = simulation_cell_.getLattice(); @@ -593,14 +647,15 @@ bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, DistTables[i]->evaluate(*this); if (structure_factor_) structure_factor_->updateAllPart(*this); - //every move is valid + // every move is valid return true; } -bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, - const ParticlePos& drift, - const ParticlePos& deltaR, - const std::vector& dt) +template +bool ParticleSetT::makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, + const ParticlePos& deltaR, + const std::vector& dt) { active_ptcl_ = -1; auto& Lattice = simulation_cell_.getLattice(); @@ -628,7 +683,7 @@ bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, DistTables[i]->evaluate(*this); if (structure_factor_) structure_factor_->updateAllPart(*this); - //every move is valid + // every move is valid return true; } @@ -637,14 +692,15 @@ bool ParticleSet::makeMoveAllParticlesWithDrift(const Walker_t& awalker, * When the active_ptcl_ is equal to iat, overwrite the position and update the * content of the distance tables. */ -void ParticleSet::acceptMove(Index_t iat) +template +void ParticleSetT::acceptMove(Index_t iat) { #ifndef NDEBUG if (iat != active_ptcl_) throw std::runtime_error("Bug detected by acceptMove! Request electron is not active!"); #endif ScopedTimer update_scope(myTimers[PS_accept]); - //Update position + distance-table + // Update position + distance-table coordinates_->setOneParticlePos(active_pos_, iat); for (int i = 0; i < DistTables.size(); i++) DistTables[i]->update(iat); @@ -654,11 +710,12 @@ void ParticleSet::acceptMove(Index_t iat) active_ptcl_ = -1; } -void ParticleSet::acceptMoveForwardMode(Index_t iat) +template +void ParticleSetT::acceptMoveForwardMode(Index_t iat) { assert(iat == active_ptcl_); ScopedTimer update_scope(myTimers[PS_accept]); - //Update position + distance-table + // Update position + distance-table coordinates_->setOneParticlePos(active_pos_, iat); for (int i = 0; i < DistTables.size(); i++) DistTables[i]->updatePartial(iat, true); @@ -668,7 +725,8 @@ void ParticleSet::acceptMoveForwardMode(Index_t iat) active_ptcl_ = -1; } -void ParticleSet::accept_rejectMove(Index_t iat, bool accepted, bool forward_mode) +template +void ParticleSetT::accept_rejectMove(Index_t iat, bool accepted, bool forward_mode) { if (forward_mode) if (accepted) @@ -681,7 +739,8 @@ void ParticleSet::accept_rejectMove(Index_t iat, bool accepted, bool forward_mod rejectMove(iat); } -void ParticleSet::rejectMove(Index_t iat) +template +void ParticleSetT::rejectMove(Index_t iat) { #ifndef NDEBUG if (iat != active_ptcl_) @@ -690,41 +749,43 @@ void ParticleSet::rejectMove(Index_t iat) active_ptcl_ = -1; } -void ParticleSet::rejectMoveForwardMode(Index_t iat) +template +void ParticleSetT::rejectMoveForwardMode(Index_t iat) { assert(iat == active_ptcl_); - //Update distance-table + // Update distance-table for (int i = 0; i < DistTables.size(); i++) DistTables[i]->updatePartial(iat, false); active_ptcl_ = -1; } +template template -void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode) +void ParticleSetT::mw_accept_rejectMoveT(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode) { if constexpr (CT == CoordsType::POS_SPIN) mw_accept_rejectSpinMove(p_list, iat, isAccepted); mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode); } - -void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode) +template +void ParticleSetT::mw_accept_rejectMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode) { if (forward_mode) { - ParticleSet& p_leader = p_list.getLeader(); + ParticleSetT& p_leader = p_list.getLeader(); ScopedTimer update_scope(p_leader.myTimers[PS_accept]); const auto coords_list(extractCoordsRefList(p_list)); std::vector new_positions; new_positions.reserve(p_list.size()); - for (const ParticleSet& pset : p_list) + for (const ParticleSetT& pset : p_list) new_positions.push_back(pset.active_pos_); p_leader.coordinates_->mw_acceptParticlePos(coords_list, iat, new_positions, isAccepted); @@ -754,9 +815,10 @@ void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p } } -void ParticleSet::mw_accept_rejectSpinMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted) +template +void ParticleSetT::mw_accept_rejectSpinMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted) { for (int iw = 0; iw < p_list.size(); iw++) { @@ -766,7 +828,8 @@ void ParticleSet::mw_accept_rejectSpinMove(const RefVectorWithLeader +void ParticleSetT::donePbyP(bool skipSK) { ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]); coordinates_->donePbyP(); @@ -777,12 +840,13 @@ void ParticleSet::donePbyP(bool skipSK) active_ptcl_ = -1; } -void ParticleSet::mw_donePbyP(const RefVectorWithLeader& p_list, bool skipSK) +template +void ParticleSetT::mw_donePbyP(const RefVectorWithLeader>& p_list, bool skipSK) { - ParticleSet& p_leader = p_list.getLeader(); + ParticleSetT& p_leader = p_list.getLeader(); ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]); - for (ParticleSet& pset : p_list) + for (ParticleSetT& pset : p_list) { pset.coordinates_->donePbyP(); pset.active_ptcl_ = -1; @@ -791,7 +855,7 @@ void ParticleSet::mw_donePbyP(const RefVectorWithLeader& p_list, bo if (!skipSK && p_leader.structure_factor_) { auto sk_list = extractSKRefList(p_list); - StructFact::mw_updateAllPart(sk_list, p_list, p_leader.mw_structure_factor_data_handle_); + StructFactT::mw_updateAllPart(sk_list, p_list, p_leader.mw_structure_factor_data_handle_); } auto& dts = p_leader.DistTables; @@ -802,7 +866,8 @@ void ParticleSet::mw_donePbyP(const RefVectorWithLeader& p_list, bo } } -void ParticleSet::makeVirtualMoves(const SingleParticlePos& newpos) +template +void ParticleSetT::makeVirtualMoves(const SingleParticlePos& newpos) { active_ptcl_ = -1; active_pos_ = newpos; @@ -810,7 +875,8 @@ void ParticleSet::makeVirtualMoves(const SingleParticlePos& newpos) DistTables[i]->move(*this, newpos, active_ptcl_, false); } -void ParticleSet::loadWalker(Walker_t& awalker, bool pbyp) +template +void ParticleSetT::loadWalker(Walker_t& awalker, bool pbyp) { ScopedTimer update_scope(myTimers[PS_loadWalker]); R = awalker.R; @@ -831,15 +897,16 @@ void ParticleSet::loadWalker(Walker_t& awalker, bool pbyp) active_ptcl_ = -1; } -void ParticleSet::mw_loadWalker(const RefVectorWithLeader& p_list, - const RefVector& walkers, - const std::vector& recompute, - bool pbyp) +template +void ParticleSetT::mw_loadWalker(const RefVectorWithLeader>& p_list, + const RefVector& walkers, + const std::vector& recompute, + bool pbyp) { auto& p_leader = p_list.getLeader(); ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]); - auto loadWalkerConfig = [](ParticleSet& pset, Walker_t& awalker) { + auto loadWalkerConfig = [](ParticleSetT& pset, Walker_t& awalker) { pset.R = awalker.R; pset.spins = awalker.spins; pset.coordinates_->setAllParticlePos(pset.R); @@ -859,7 +926,8 @@ void ParticleSet::mw_loadWalker(const RefVectorWithLeader& p_list, } } -void ParticleSet::saveWalker(Walker_t& awalker) +template +void ParticleSetT::saveWalker(Walker_t& awalker) { awalker.R = R; awalker.spins = spins; @@ -869,17 +937,18 @@ void ParticleSet::saveWalker(Walker_t& awalker) #endif } -void ParticleSet::mw_saveWalker(const RefVectorWithLeader& psets, const RefVector& walkers) +template +void ParticleSetT::mw_saveWalker(const RefVectorWithLeader>& psets, const RefVector& walkers) { for (int iw = 0; iw < psets.size(); ++iw) psets[iw].saveWalker(walkers[iw]); } - -void ParticleSet::initPropertyList() +template +void ParticleSetT::initPropertyList() { PropertyList.clear(); - //Need to add the default Properties according to the enumeration + // Need to add the default Properties according to the enumeration PropertyList.add("LogPsi"); PropertyList.add("SignPsi"); PropertyList.add("UmbrellaWeight"); @@ -890,16 +959,19 @@ void ParticleSet::initPropertyList() PropertyList.add("LocalEnergy"); PropertyList.add("LocalPotential"); - // There is no point in checking this, its quickly not consistent as other objects update property list. - // if (PropertyList.size() != WP::NUMPROPERTIES) + // There is no point in checking this, its quickly not consistent as other + // objects update property list. if (PropertyList.size() != + // WP::NUMPROPERTIES) // { - // app_error() << "The number of default properties for walkers is not consistent." << std::endl; - // app_error() << "NUMPROPERTIES " << WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() << std::endl; - // throw std::runtime_error("ParticleSet::initPropertyList"); + // app_error() << "The number of default properties for walkers is not + // consistent." << std::endl; app_error() << "NUMPROPERTIES " << + // WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() << + // std::endl; throw std::runtime_error("ParticleSet::initPropertyList"); // } } -int ParticleSet::addPropertyHistory(int leng) +template +int ParticleSetT::addPropertyHistory(int leng) { int newL = PropertyHistory.size(); PropertyHistory.push_back(std::vector(leng, 0.0)); @@ -934,23 +1006,25 @@ int ParticleSet::addPropertyHistory(int leng) // if (lastIndex<0) lastIndex+=PropertyHistory[dindex].size(); // PropertyHistory[dindex][PHindex[dindex]]=PropertyHistory[dindex][lastIndex]; // PHindex[dindex]++; -// if (PHindex[dindex]==PropertyHistory[dindex].size()) PHindex[dindex]=0; +// if (PHindex[dindex]==PropertyHistory[dindex].size()) +// PHindex[dindex]=0; // // PropertyHistory[dindex].push_front(PropertyHistory[dindex].front()); // // PropertyHistory[dindex].pop_back(); // } // } - -void ParticleSet::createResource(ResourceCollection& collection) const +template +void ParticleSetT::createResource(ResourceCollection& collection) const { coordinates_->createResource(collection); for (int i = 0; i < DistTables.size(); i++) DistTables[i]->createResource(collection); if (structure_factor_) - collection.addResource(std::make_unique()); + collection.addResource(std::make_unique>()); } -void ParticleSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& p_list) +template +void ParticleSetT::acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& p_list) { auto& ps_leader = p_list.getLeader(); ps_leader.coordinates_->acquireResource(collection, extractCoordsRefList(p_list)); @@ -958,10 +1032,11 @@ void ParticleSet::acquireResource(ResourceCollection& collection, const RefVecto ps_leader.DistTables[i]->acquireResource(collection, extractDTRefList(p_list, i)); if (ps_leader.structure_factor_) - p_list.getLeader().mw_structure_factor_data_handle_ = collection.lendResource(); + p_list.getLeader().mw_structure_factor_data_handle_ = collection.lendResource>(); } -void ParticleSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& p_list) +template +void ParticleSetT::releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& p_list) { auto& ps_leader = p_list.getLeader(); ps_leader.coordinates_->releaseResource(collection, extractCoordsRefList(p_list)); @@ -972,47 +1047,308 @@ void ParticleSet::releaseResource(ResourceCollection& collection, const RefVecto collection.takebackResource(p_list.getLeader().mw_structure_factor_data_handle_); } -RefVectorWithLeader ParticleSet::extractDTRefList(const RefVectorWithLeader& p_list, int id) +template +RefVectorWithLeader> ParticleSetT::extractDTRefList( + const RefVectorWithLeader>& p_list, + int id) { - RefVectorWithLeader dt_list(*p_list.getLeader().DistTables[id]); + RefVectorWithLeader> dt_list(*p_list.getLeader().DistTables[id]); dt_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) + for (ParticleSetT& p : p_list) dt_list.push_back(*p.DistTables[id]); return dt_list; } -RefVectorWithLeader ParticleSet::extractCoordsRefList( - const RefVectorWithLeader& p_list) +template +RefVectorWithLeader> ParticleSetT::extractCoordsRefList( + const RefVectorWithLeader>& p_list) { - RefVectorWithLeader coords_list(*p_list.getLeader().coordinates_); + RefVectorWithLeader> coords_list(*p_list.getLeader().coordinates_); coords_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) + for (ParticleSetT& p : p_list) coords_list.push_back(*p.coordinates_); return coords_list; } -RefVectorWithLeader ParticleSet::extractSKRefList(const RefVectorWithLeader& p_list) +template +RefVectorWithLeader> ParticleSetT::extractSKRefList(const RefVectorWithLeader>& p_list) { - RefVectorWithLeader sk_list(*p_list.getLeader().structure_factor_); + RefVectorWithLeader> sk_list(*p_list.getLeader().structure_factor_); sk_list.reserve(p_list.size()); - for (ParticleSet& p : p_list) + for (ParticleSetT& p : p_list) sk_list.push_back(*p.structure_factor_); return sk_list; } -//explicit instantiations -template void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const MCCoords& displs); -template void ParticleSet::mw_makeMove(const RefVectorWithLeader& p_list, - Index_t iat, - const MCCoords& displs); -template void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, +/** Creating StructureFactor + * + * Currently testing only 1 component for PBCs. + */ +template +void ParticleSetT::createSK() +{ + if (structure_factor_) + throw std::runtime_error("Report bug! structure_factor_ has already " + "been created. Unexpected call sequence."); + + auto& Lattice = getLattice(); + auto& LRBox = getLRBox(); + if (Lattice.explicitly_defined) + convert2Cart(R); // make sure that R is in Cartesian coordinates + + if (Lattice.SuperCellEnum != SUPERCELL_OPEN) + { + app_log() << "\n Creating Structure Factor for periodic systems " << LRBox.LR_kc << std::endl; + structure_factor_ = std::make_unique>(LRBox, simulation_cell_.getKLists()); + } + + // set the mass array + int beforemass = my_species_.numAttributes(); + int massind = my_species_.addAttribute("mass"); + if (beforemass == massind) + { + app_log() << " ParticleSet::createSK setting mass of " << getName() << " to 1.0" << std::endl; + for (int ig = 0; ig < my_species_.getTotalNum(); ++ig) + my_species_(massind, ig) = 1.0; + } + for (int iat = 0; iat < GroupID.size(); iat++) + Mass[iat] = my_species_(massind, GroupID[iat]); + + coordinates_->setAllParticlePos(R); +} + +template +void ParticleSetT::turnOnPerParticleSK() +{ + if (structure_factor_) + structure_factor_->turnOnStorePerParticle(*this); + else + throw std::runtime_error("ParticleSet::turnOnPerParticleSK trying to turn on per particle " + "storage in " + "structure_factor_ but structure_factor_ has not been created."); +} + +template +bool ParticleSetT::getPerParticleSKState() const +{ + bool isPerParticleOn = false; + if (structure_factor_) + isPerParticleOn = structure_factor_->isStorePerParticle(); + return isPerParticleOn; +} + +template +void ParticleSetT::convert(const ParticlePos& pin, ParticlePos& pout) +{ + if (pin.getUnit() == pout.getUnit()) + { + pout = pin; + return; + } + if (pin.getUnit() == PosUnit::Lattice) + // convert to CartesianUnit + { + ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); + } + else + // convert to getLattice()Unit + { + ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); + } +} + +template +void ParticleSetT::convert2Unit(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + if (pin.getUnit() == PosUnit::Lattice) + pout = pin; + else + ConvertPosUnit::apply(pin, getLattice().G, pout, 0, pin.size()); +} + +template +void ParticleSetT::convert2Cart(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Cartesian); + if (pin.getUnit() == PosUnit::Cartesian) + pout = pin; + else + ConvertPosUnit::apply(pin, getLattice().R, pout, 0, pin.size()); +} + +template +void ParticleSetT::convert2Unit(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Lattice) + return; + else + { + pinout.setUnit(PosUnit::Lattice); + ConvertPosUnit::apply(pinout, getLattice().G, 0, pinout.size()); + } +} + +template +void ParticleSetT::convert2Cart(ParticlePos& pinout) +{ + if (pinout.getUnit() == PosUnit::Cartesian) + return; + else + { + pinout.setUnit(PosUnit::Cartesian); + ConvertPosUnit::apply(pinout, getLattice().R, 0, pinout.size()); + } +} + +template +void ParticleSetT::applyBC(const ParticlePos& pin, ParticlePos& pout) +{ + applyBC(pin, pout, 0, pin.size()); +} + +template +void ParticleSetT::applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last) +{ + if (pin.getUnit() == PosUnit::Cartesian) + { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Cart2Cart(pin, getLattice().G, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Cart2Unit(pin, getLattice().G, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else if (pin.getUnit() == PosUnit::Lattice) + { + if (pout.getUnit() == PosUnit::Cartesian) + ApplyBConds::Unit2Cart(pin, getLattice().R, pout, first, last); + else if (pout.getUnit() == PosUnit::Lattice) + ApplyBConds::Unit2Unit(pin, pout, first, last); + else + throw std::runtime_error("Unknown unit conversion"); + } + else + throw std::runtime_error("Unknown unit conversion"); +} + +template +void ParticleSetT::applyBC(ParticlePos& pos) +{ + if (pos.getUnit() == PosUnit::Lattice) + { + ApplyBConds::Unit2Unit(pos, 0, TotalNum); + } + else + { + ApplyBConds::Cart2Cart(pos, getLattice().G, getLattice().R, 0, TotalNum); + } +} + +template +void ParticleSetT::applyMinimumImage(ParticlePos& pinout) +{ + if (getLattice().SuperCellEnum == SUPERCELL_OPEN) + return; + for (int i = 0; i < pinout.size(); ++i) + getLattice().applyMinimumImage(pinout[i]); +} + +template +void ParticleSetT::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout) +{ + pout.setUnit(PosUnit::Lattice); + convert2Unit(pin, pout); // convert to crystalline unit + put2box(pout); +} + +template +void ParticleSetT::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout) +{ + convert2UnitInBox(pin, pout); // convert to crystalline unit + convert2Cart(pout); +} + +// explicit instantiations +//#ifndef QMC_COMPLEX +template class ParticleSetT; +template class ParticleSetT; +#ifdef QMC_COMPLEX +template class ParticleSetT>; +template class ParticleSetT>; +#endif + +template void ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, Index_t iat, - const std::vector& isAccepted, - bool forward_mode); -template void ParticleSet::mw_accept_rejectMove(const RefVectorWithLeader& p_list, - Index_t iat, - const std::vector& isAccepted, - bool forward_mode); + const MCCoordsT& displs); +template void ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT& displs); +template void ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); +template void ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); + +template void ParticleSetT::mw_makeMove(const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT& displs); +template void ParticleSetT::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT& displs); +template void ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); +template void ParticleSetT::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); + +template void ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT, CoordsType::POS>& displs); +template void ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT, CoordsType::POS_SPIN>& displs); +template void ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); +template void ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); + +template void ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT, CoordsType::POS>& displs); +template void ParticleSetT>::mw_makeMove( + const RefVectorWithLeader& p_list, + Index_t iat, + const MCCoordsT, CoordsType::POS_SPIN>& displs); +template void ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); +template void ParticleSetT>::mw_accept_rejectMoveT( + const RefVectorWithLeader& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode); } // namespace qmcplusplus diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h new file mode 100644 index 0000000000..c037bcba5b --- /dev/null +++ b/src/Particle/ParticleSetT.h @@ -0,0 +1,763 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: D. Das, University of Illinois at Urbana-Champaign +// Bryan Clark, bclark@Princeton.edu, Princeton University +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_PARTICLESETT_H +#define QMCPLUSPLUS_PARTICLESETT_H + +#include "DTModes.h" +#include "DynamicCoordinatesT.h" +#include "MCCoordsT.hpp" +#include "OhmmsPETE/OhmmsArray.h" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/ParticleSetTraits.h" +#include "ParticleTags.h" +#include "Pools/PooledData.h" +#include "ResourceHandle.h" +#include "SimulationCellT.h" +#include "SpeciesSet.h" +#include "Utilities/TimerManager.h" +#include "Walker.h" +#include "type_traits/template_types.hpp" + +#include + +namespace qmcplusplus +{ +/// forward declarations +template +class DistanceTableT; +template +class DistanceTableAAT; +template +class DistanceTableABT; +class ResourceCollection; +template +class StructFactT; +template +struct SKMultiWalkerMemT; + +/** Specialized paritlce class for atomistic simulations + * + * Derived from QMCTraits, ParticleBase and + * OhmmsElementBase. The ParticleLayout class represents a supercell + * with/without periodic boundary conditions. The ParticleLayout class also + * takes care of spatial decompositions for efficient evaluations for the + * interactions with a finite cutoff. + */ +template +class ParticleSetT : public OhmmsElementBase +{ +public: + using RealType = typename ParticleSetTraits::RealType; + using ValueType = typename ParticleSetTraits::ValueType; + using GradType = typename ParticleSetTraits::GradType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + using ComplexType = typename ParticleSetTraits::ComplexType; + using PosType = typename ParticleSetTraits::PosType; + using TensorType = typename ParticleSetTraits::TensorType; + + using PropertySetType = typename ParticleSetTraits::PropertySetType; + + using Index_t = typename LatticeParticleTraits::Index_t; + using Scalar_t = typename LatticeParticleTraits::Scalar_t; + using Tensor_t = typename LatticeParticleTraits::Tensor_t; + using ParticleLayout = typename LatticeParticleTraits::ParticleLayout; + using SingleParticlePos = typename LatticeParticleTraits::SingleParticlePos; + using ParticleIndex = typename LatticeParticleTraits::ParticleIndex; + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using ParticleScalar = typename LatticeParticleTraits::ParticleScalar; + using ParticleGradient = typename LatticeParticleTraits::ParticleGradient; + using ParticleLaplacian = typename LatticeParticleTraits::ParticleLaplacian; + using ParticleTensor = typename LatticeParticleTraits::ParticleTensor; + + /// walker type + using Walker_t = Walker, LatticeParticleTraits>; + /// container type to store the property + using PropertyContainer_t = typename Walker_t::PropertyContainer_t; + /// buffer type for a serialized buffer + using Buffer_t = PooledData; + + using SingleParticleValue = typename LatticeParticleTraits::SingleParticleValue; + + enum quantum_domains + { + no_quantum_domain = 0, + classical, + quantum + }; + + static constexpr auto DIM = ParticleSetTraits::DIM; + + /// quantum_domain of the particles, default = classical + quantum_domains quantum_domain; + + //@{ public data members + /// Species ID + ParticleIndex GroupID; + /// Position + ParticlePos R; + /// internal spin variables for dynamical spin calculations + ParticleScalar spins; + /// gradients of the particles + ParticleGradient G; + /// laplacians of the particles + ParticleLaplacian L; + /// mass of each particle + ParticleScalar Mass; + /// charge of each particle + ParticleScalar Z; + + /// the index of the active bead for particle-by-particle moves + Index_t activeBead; + /// the direction reptile traveling + Index_t direction; + + /// Particle density in G-space for MPC interaction + std::vector> DensityReducedGvecs; + std::vector Density_G; + Array Density_r; + + /// DFT potential + std::vector> VHXCReducedGvecs; + std::vector VHXC_G[2]; + Array VHXC_r[2]; + + /** name-value map of Walker Properties + * + * PropertyMap is used to keep the name-value mapping of + * Walker_t::Properties. PropertyList::Values are not + * necessarily updated during the simulations. + */ + PropertySetType PropertyList; + + /** properties of the current walker + * + * The internal order is identical to PropertyList, which holds + * the matching names. + */ + PropertyContainer_t Properties; + + /** observables in addition to those registered in Properties/PropertyList + * + * Such observables as density, gofr, sk are not stored per walker but + * collected during QMC iterations. + */ + Buffer_t Collectables; + + /// Property history vector + std::vector> PropertyHistory; + std::vector PHindex; + ///@} + + /// current MC step + int current_step; + + /// default constructor + ParticleSetT(const SimulationCellT& simulation_cell, + const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS); + + /// copy constructor + ParticleSetT(const ParticleSetT& p); + + /// default destructor + ~ParticleSetT() override; + + /** create grouped particles + * @param agroup number of particles per group + */ + void create(const std::vector& agroup); + + /** print particle coordinates to a std::ostream + * @param os output stream + * @param maxParticlesToPrint maximal number of particles to print. Pass 0 + * to print all. + */ + void print(std::ostream& os, const size_t maxParticlesToPrint = 0) const; + + /// dummy. For satisfying OhmmsElementBase. + bool get(std::ostream& os) const override; + /// dummy. For satisfying OhmmsElementBase. + bool put(std::istream&) override; + /// dummy. For satisfying OhmmsElementBase. + void reset() override; + + /// initialize ParticleSet from xmlNode + bool put(xmlNodePtr cur) override; + + /// specify quantum_domain of particles + void setQuantumDomain(quantum_domains qdomain); + + void set_quantum() { quantum_domain = quantum; } + + inline bool is_classical() const { return quantum_domain == classical; } + + inline bool is_quantum() const { return quantum_domain == quantum; } + + /// check whether quantum domain is valid for particles + inline bool quantumDomainValid(quantum_domains qdomain) const { return qdomain != no_quantum_domain; } + + /// check whether quantum domain is valid for particles + inline bool quantumDomainValid() const { return quantumDomainValid(quantum_domain); } + + /** add a distance table + * @param psrc source particle set + * @param modes bitmask DistanceTable::DTModes + * + * if this->myName == psrc.getName(), AA type. Otherwise, AB type. + */ + int addTable(const ParticleSetT& psrc, DTModes modes = DTModes::ALL_OFF); + + /// get a distance table by table_ID + inline auto& getDistTable(int table_ID) const { return *DistTables[table_ID]; } + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAA + const DistanceTableAAT& getDistTableAA(int table_ID) const; + /// get a distance table by table_ID and dyanmic_cast to DistanceTableAB + const DistanceTableABT& getDistTableAB(int table_ID) const; + + /** reset all the collectable quantities during a MC iteration + */ + inline void resetCollectables() { std::fill(Collectables.begin(), Collectables.end(), 0.0); } + + /** update the internal data + *@param skip SK update if skipSK is true + */ + void update(bool skipSK = false); + + /// batched version of update + static void mw_update(const RefVectorWithLeader>& p_list, bool skipSK = false); + + /** create Structure Factor with PBCs + */ + void createSK(); + + bool hasSK() const { return bool(structure_factor_); } + + /** return Structure Factor + */ + const StructFactT& getSK() const + { + assert(structure_factor_); + return *structure_factor_; + }; + + /** Turn on per particle storage in Structure Factor + */ + void turnOnPerParticleSK(); + + /** Get state (on/off) of per particle storage in Structure Factor + */ + bool getPerParticleSKState() const; + + /// retrun the SpeciesSet of this particle set + inline SpeciesSet& getSpeciesSet() { return my_species_; } + /// retrun the const SpeciesSet of this particle set + inline const SpeciesSet& getSpeciesSet() const { return my_species_; } + + /// return parent's name + inline const std::string& parentName() const { return ParentName; } + inline void setName(const std::string& aname) + { + myName = aname; + if (ParentName == "0") + { + ParentName = aname; + } + } + + inline const DynamicCoordinatesT& getCoordinates() const { return *coordinates_; } + + void resetGroups(); + + const auto& getSimulationCell() const { return simulation_cell_; } + const auto& getLattice() const { return simulation_cell_.getLattice(); } + auto& getPrimitiveLattice() const { return const_cast(simulation_cell_.getPrimLattice()); } + const auto& getLRBox() const { return simulation_cell_.getLRBox(); } + + inline bool isSameMass() const { return same_mass_; } + inline bool isSpinor() const { return is_spinor_; } + inline void setSpinor(bool is_spinor) { is_spinor_ = is_spinor; } + + /// return active particle id + inline Index_t getActivePtcl() const { return active_ptcl_; } + inline const PosType& getActivePos() const { return active_pos_; } + inline Scalar_t getActiveSpinVal() const { return active_spin_val_; } + + /// return the active position if the particle is active or the return + /// current position if not + inline const PosType& activeR(int iat) const + { + // When active_ptcl_ == iat, a move has been proposed. + return (active_ptcl_ == iat) ? active_pos_ : R[iat]; + } + + /// return the active spin value if the particle is active or return the + /// current spin value if not + inline const Scalar_t& activeSpin(int iat) const + { + // When active_ptcl_ == iat, a move has been proposed. + return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat]; + } + + /** move the iat-th particle to active_pos_ + * @param iat the index of the particle to be moved + * @param displ the displacement of the iat-th particle position + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + * + * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a + * proposed move. Evaluate the related distance table data + * DistanceTable::Temp. If maybe_accept = false, certain operations for + * accepting moves will be skipped for optimal performance. + */ + void makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true); + /// makeMove, but now includes an update to the spin variable + void makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); + + /// batched version of makeMove + template + static void mw_makeMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const MCCoordsT& displs); + + static void mw_makeMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& displs); + + /// batched version makeMove for spin variable only + static void mw_makeSpinMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& sdispls); + + /** move the iat-th particle to active_pos_ + * @param iat the index of the particle to be moved + * @param displ random displacement of the iat-th particle + * @return true, if the move is valid + * + * Update active_ptcl_ index and active_pos_ position (R[iat]+displ) for a + * proposed move. Evaluate the related distance table data + * DistanceTable::Temp. + * + * When a Lattice is defined, passing two checks makes a move valid. + * outOfBound(displ): invalid move, if displ is larger than half, currently, + * of the box in any direction isValid(Lattice.toUnit(active_pos_)): invalid + * move, if active_pos_ goes out of the Lattice in any direction marked with + * open BC. Note: active_pos_ and distances tables are always evaluated no + * matter the move is valid or not. + */ + bool makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ); + /// makeMoveAndCheck, but now includes an update to the spin variable + bool makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl); + + /** Handles virtual moves for all the particles to a single newpos. + * + * The state active_ptcl_ remains -1 and rejectMove is not needed. + * acceptMove can not be used. + * See QMCHamiltonians::MomentumEstimator as an example + */ + void makeVirtualMoves(const SingleParticlePos& newpos); + + /** move all the particles of a walker + * @param awalker the walker to operate + * @param deltaR proposed displacement + * @param dt factor of deltaR + * @return true if all the moves are legal. + * + * If big displacements or illegal positions are detected, return false. + * If all good, R = awalker.R + dt* deltaR + */ + bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt); + + bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, const std::vector& dt); + + /** move all the particles including the drift + * + * Otherwise, everything is the same as makeMove for a walker + */ + bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, + const ParticlePos& deltaR, + RealType dt); + + bool makeMoveAllParticlesWithDrift(const Walker_t& awalker, + const ParticlePos& drift, + const ParticlePos& deltaR, + const std::vector& dt); + + /** accept or reject a proposed move + * Two operation modes: + * The using and updating distance tables via `ParticleSet` operate in two + * modes, regular and forward modes. + * + * Regular mode + * The regular mode can only be used when the distance tables for particle + * pairs are fully up-to-date. This is the case after calling + * `ParticleSet::update()` in a unit test or after p-by-p moves in a QMC + * driver. In this mode, the distance tables remain up-to-date after calling + * `ParticleSet::acceptMove` and calling `ParticleSet::rejectMove` is not + * mandatory. + * + * Forward mode + * The forward mode assumes that distance table is not fully up-to-date + * until every particle is accepted or rejected to move once in order. This + * is the mode used in the p-by-p part of drivers. In this mode, calling + * `ParticleSet::accept_rejectMove` is required to handle accept/reject + * rather than calling individual `ParticleSet::acceptMove` and + * `ParticleSet::reject`. `ParticleSet::accept_rejectMove(iel)` ensures the + * distance tables (jel < iel) part is fully up-to-date regardless a move is + * accepted or rejected. For this reason, the rejecting operation inside + * `ParticleSet::accept_rejectMove` involves writing the distances with + * respect to the old particle position. + */ + void accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true); + + /** accept the move and update the particle attribute by the proposed move + *in regular mode + *@param iat the index of the particle whose position and other attributes + *to be updated + */ + void acceptMove(Index_t iat); + + /** reject a proposed move in regular mode + * @param iat the electron whose proposed move gets rejected. + */ + void rejectMove(Index_t iat); + + /// batched version of acceptMove and rejectMove fused, templated on + /// CoordsType + template + static void mw_accept_rejectMoveT(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode = true); + + /// batched version of acceptMove and rejectMove fused + static void mw_accept_rejectMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted, + bool forward_mode = true); + + /** batched version of acceptMove and reject Move fused, but only for spins + * + * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_ + * gets reset to -1 This would cause the assertion that we have the right + * particle index to fail if done in the wrong order + */ + static void mw_accept_rejectSpinMove(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& isAccepted); + + void initPropertyList(); + inline int addProperty(const std::string& pname) { return PropertyList.add(pname.c_str()); } + + int addPropertyHistory(int leng); + // void rejectedMove(); + // void resetPropertyHistory( ); + // void addPropertyHistoryPoint(int index, RealType data); + + void convert(const ParticlePos& pin, ParticlePos& pout); + void convert2Unit(const ParticlePos& pin, ParticlePos& pout); + void convert2Cart(const ParticlePos& pin, ParticlePos& pout); + void convert2Unit(ParticlePos& pout); + void convert2Cart(ParticlePos& pout); + void convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout); + void convert2CartInBox(const ParticlePos& pint, ParticlePos& pout); + + void applyBC(const ParticlePos& pin, ParticlePos& pout); + void applyBC(ParticlePos& pos); + void applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last); + void applyMinimumImage(ParticlePos& pinout) const; + + /** load a Walker_t to the current ParticleSet + * @param awalker the reference to the walker to be loaded + * @param pbyp true if it is used by PbyP update + * + * PbyP requires the distance tables and Sk with awalker.R + */ + void loadWalker(Walker_t& awalker, bool pbyp); + /** batched version of loadWalker */ + static void mw_loadWalker(const RefVectorWithLeader>& p_list, + const RefVector& walkers, + const std::vector& recompute, + bool pbyp); + + /** save this to awalker + * + * just the R, G, and L + * More duplicate data that makes code difficult to reason about should be + * removed. + */ + void saveWalker(Walker_t& awalker); + + /** batched version of saveWalker + * + * just the R, G, and L + */ + static void mw_saveWalker(const RefVectorWithLeader>& psets, const RefVector& walkers); + + /** update structure factor and unmark active_ptcl_ + *@param skip SK update if skipSK is true + * + * The Coulomb interaction evaluation needs the structure factor. + * For these reason, call donePbyP after the loop of single + * electron moves before evaluating the Hamiltonian. Unmark + * active_ptcl_ is more of a safety measure probably not needed. + */ + void donePbyP(bool skipSK = false); + /// batched version of donePbyP + static void mw_donePbyP(const RefVectorWithLeader>& p_list, bool skipSK = false); + + /// return the address of the values of Hamiltonian terms + inline FullPrecRealType* restrict getPropertyBase() { return Properties.data(); } + + /// return the address of the values of Hamiltonian terms + inline const FullPrecRealType* restrict getPropertyBase() const { return Properties.data(); } + + /// return the address of the i-th properties + inline FullPrecRealType* restrict getPropertyBase(int i) { return Properties[i]; } + + /// return the address of the i-th properties + inline const FullPrecRealType* restrict getPropertyBase(int i) const { return Properties[i]; } + + inline void setTwist(const SingleParticlePos& t) { myTwist = t; } + + inline const SingleParticlePos& getTwist() const { return myTwist; } + + /** Initialize particles around another ParticleSet + * Used to initialize an electron ParticleSet by an ion ParticleSet + */ + void randomizeFromSource(ParticleSetT& src); + + /** get species name of particle i + */ + inline const std::string& species_from_index(int i) { return my_species_.speciesName[GroupID[i]]; } + + inline size_t getTotalNum() const { return TotalNum; } + + inline void clear() + { + TotalNum = 0; + + R.clear(); + spins.clear(); + GroupID.clear(); + G.clear(); + L.clear(); + Mass.clear(); + Z.clear(); + + coordinates_->resize(0); + } + + /// return the number of groups + inline int groups() const { return group_offsets_->size() - 1; } + + /// return the first index of a group i + inline int first(int igroup) const { return (*group_offsets_)[igroup]; } + + /// return the last index of a group i + inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } + + /// return the group id of a given particle in the particle set. + inline int getGroupID(int iat) const + { + assert(iat >= 0 && iat < TotalNum); + return GroupID[iat]; + } + + /// return the size of a group + inline int groupsize(int igroup) const { return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup]; } + + /// add attributes to list for IO + template + inline void createAttributeList(ATList& AttribList) + { + R.setTypeName(ParticleTags::postype_tag); + R.setObjName(ParticleTags::position_tag); + spins.setTypeName(ParticleTags::scalartype_tag); + spins.setObjName(ParticleTags::spins_tag); + GroupID.setTypeName(ParticleTags::indextype_tag); + GroupID.setObjName(ParticleTags::ionid_tag); + // add basic attributes + AttribList.add(R); + AttribList.add(spins); + AttribList.add(GroupID); + + G.setTypeName(ParticleTags::gradtype_tag); + L.setTypeName(ParticleTags::laptype_tag); + + G.setObjName("grad"); + L.setObjName("lap"); + + AttribList.add(G); + AttribList.add(L); + + // more particle attributes + Mass.setTypeName(ParticleTags::scalartype_tag); + Mass.setObjName("mass"); + AttribList.add(Mass); + + Z.setTypeName(ParticleTags::scalartype_tag); + Z.setObjName("charge"); + AttribList.add(Z); + } + + inline void setMapStorageToInput(const std::vector& mapping) { map_storage_to_input_ = mapping; } + inline const std::vector& get_map_storage_to_input() const { return map_storage_to_input_; } + + inline int getNumDistTables() const { return DistTables.size(); } + + inline auto& get_group_offsets() const { return *group_offsets_; } + + /// initialize a shared resource and hand it to a collection + void createResource(ResourceCollection& collection) const; + /** acquire external resource and assocaite it with the list of ParticleSet + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& p_list); + /** release external resource + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& p_list); + + static RefVectorWithLeader> extractDTRefList(const RefVectorWithLeader>& p_list, + int id); + static RefVectorWithLeader> extractCoordsRefList( + const RefVectorWithLeader>& p_list); + static RefVectorWithLeader> extractSKRefList(const RefVectorWithLeader>& p_list); + +protected: + /// reference to global simulation cell + const SimulationCellT& simulation_cell_; + + /// true if the particles have the same mass + bool same_mass_; + /// true is a dynamic spin calculation + bool is_spinor_; + /** the index of the active particle during particle-by-particle moves + * + * when a single particle move is proposed, the particle id is assigned to + * active_ptcl_ No matter the move is accepted or rejected, active_ptcl_ is + * marked back to -1. This state flag is used for picking coordinates and + * distances for SPO evaluation. + */ + Index_t active_ptcl_; + /// the proposed position of active_ptcl_ during particle-by-particle moves + SingleParticlePos active_pos_; + /// the proposed spin of active_ptcl_ during particle-by-particle moves + Scalar_t active_spin_val_; + + /** Map storage index to the input index. + * If not empty, particles were reordered by groups when being loaded from + * XML input. When other input data are affected by reordering, its builder + * should query this mapping. map_storage_to_input_[5] = 2 means the index + * 5(6th) particle in this ParticleSet was read from the index 2(3th) + * particle in the XML input + */ + std::vector map_storage_to_input_; + + /// SpeciesSet of particles + SpeciesSet my_species_; + + /// Structure factor + std::unique_ptr> structure_factor_; + + /// multi walker structure factor data + ResourceHandle> mw_structure_factor_data_handle_; + + /** map to handle distance tables + * + * myDistTableMap[source-particle-tag]= locator in the distance table + * myDistTableMap[ObjectTag] === 0 + */ + std::map myDistTableMap; + + /// distance tables that need to be updated by moving this ParticleSet + std::vector>> DistTables; + + /// Descriptions from distance table creation. Same order as DistTables. + std::vector distTableDescriptions; + + TimerList_t myTimers; + + SingleParticlePos myTwist; + + std::string ParentName; + + /// total number of particles + size_t TotalNum; + + /// array to handle a group of distinct particles per species + std::shared_ptr>> group_offsets_; + + /// internal representation of R. It can be an SoA copy of R + std::unique_ptr> coordinates_; + + /** compute temporal DistTables and SK for a new particle position + * + * @param iat the particle that is moved on a sphere + * @param newpos a new particle position + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + */ + void computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true); + + /** compute temporal DistTables and SK for a new particle position for each + * walker in a batch + * + * @param p_list the list of wrapped ParticleSet references in a walker + * batch + * @param iat the particle that is moved on a sphere + * @param new_positions new particle positions + * @param maybe_accept if false, the caller guarantees that the proposed + * move will not be accepted. + */ + static void mw_computeNewPosDistTables(const RefVectorWithLeader>& p_list, + Index_t iat, + const std::vector& new_positions, + bool maybe_accept = true); + + /** actual implemenation for accepting a proposed move in forward mode + * + * @param iat the index of the particle whose position and other attributes + * to be updated + */ + void acceptMoveForwardMode(Index_t iat); + + /** reject a proposed move in forward mode + * @param iat the electron whose proposed move gets rejected. + */ + void rejectMoveForwardMode(Index_t iat); + + /// resize internal storage + inline void resize(size_t numPtcl) + { + TotalNum = numPtcl; + + R.resize(numPtcl); + spins.resize(numPtcl); + GroupID.resize(numPtcl); + G.resize(numPtcl); + L.resize(numPtcl); + Mass.resize(numPtcl); + Z.resize(numPtcl); + + coordinates_->resize(numPtcl); + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h new file mode 100644 index 0000000000..ec02fa4d58 --- /dev/null +++ b/src/Particle/ParticleSetTraits.h @@ -0,0 +1,95 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Philip Fackler, facklerpw@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Philip Fackler, facklerpw@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_PARTICLESETTRAITS_H +#define QMCPLUSPLUS_PARTICLESETTRAITS_H + +#include + +#include "OhmmsData/RecordProperty.h" +#include "OhmmsPETE/Tensor.h" +#include "OhmmsPETE/TinyVector.h" +#include "Particle/Lattice/CrystalLattice.h" +#include "Particle/ParticleBase/ParticleAttrib.h" +#include "type_traits/complex_help.hpp" + +namespace qmcplusplus +{ +template +struct ParticleSetTraits +{ + enum + { + DIM = OHMMS_DIM + }; + using RealType = RealAlias; + using ComplexType = std::complex; + using ValueType = T; + using IndexType = int; + using PosType = TinyVector; + using GradType = TinyVector; + // using HessType = Tensor; + using TensorType = Tensor; + // using GradHessType = TinyVector, DIM>; + // using IndexVector = Vector; + // using ValueVector = Vector; + // using ValueMatrix = Matrix; + // using GradVector = Vector; + // using GradMatrix = Matrix; + // using HessVector = Vector; + // using HessMatrix = Matrix; + // using GradHessVector = Vector; + // using GradHessMatrix = Matrix; + // using VGLVector = VectorSoaContainer; + + using FullPrecRealType = double; + using FullPrecComplexType = std::complex; + using FullPrecValueType = std::conditional_t::value, FullPrecComplexType, FullPrecRealType>; + + using PropertySetType = RecordNamedProperty; +}; + +template +struct LatticeParticleTraits +{ + enum + { + DIM = OHMMS_DIM + }; + using RealType = typename ParticleSetTraits::RealType; + + using ParticleLayout = CrystalLattice; + using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex; + using SingleParticlePos = typename ParticleLayout::SingleParticlePos; + using ParticleTensorType = typename ParticleLayout::Tensor_t; + + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + using FullPrecComplexType = typename ParticleSetTraits::FullPrecComplexType; + using FullPrecValueType = typename ParticleSetTraits::FullPrecValueType; + + using FullPrecGradType = TinyVector; + + using Index_t = int; + using Scalar_t = FullPrecRealType; + using Complex_t = FullPrecComplexType; + using Tensor_t = Tensor; + + using ParticleIndex = ParticleAttrib; + using ParticleScalar = ParticleAttrib; + using ParticlePos = ParticleAttrib; + using ParticleTensor = ParticleAttrib; + + using ParticleGradient = ParticleAttrib; + using ParticleLaplacian = ParticleAttrib; + using SingleParticleValue = FullPrecValueType; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/RealSpacePositions.h b/src/Particle/RealSpacePositions.h index b7fe5e44f5..e6ff6beaa7 100644 --- a/src/Particle/RealSpacePositions.h +++ b/src/Particle/RealSpacePositions.h @@ -15,51 +15,12 @@ #ifndef QMCPLUSPLUS_REALSPACE_POSITIONS_H #define QMCPLUSPLUS_REALSPACE_POSITIONS_H -#include "Particle/DynamicCoordinates.h" -#include "OhmmsSoA/VectorSoaContainer.h" +#include "Configuration.h" +#include "Particle/RealSpacePositionsT.h" namespace qmcplusplus { -/** Introduced to handle virtual moves and ratio computations, e.g. for non-local PP evaluations. - */ -class RealSpacePositions : public DynamicCoordinates -{ -public: - using ParticlePos = PtclOnLatticeTraits::ParticlePos; - using RealType = QMCTraits::RealType; - using PosType = QMCTraits::PosType; - - RealSpacePositions() : DynamicCoordinates(DynamicCoordinateKind::DC_POS) {} - - std::unique_ptr makeClone() override { return std::make_unique(*this); } - - void resize(size_t n) override { RSoA.resize(n); } - size_t size() const override { return RSoA.size(); } - - void setAllParticlePos(const ParticlePos& R) override - { - resize(R.size()); - RSoA.copyIn(R); - } - void setOneParticlePos(const PosType& pos, size_t iat) override { RSoA(iat) = pos; } - - void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const override - { - assert(this == &coords_list.getLeader()); - for (size_t iw = 0; iw < isAccepted.size(); iw++) - if (isAccepted[iw]) - coords_list[iw].setOneParticlePos(new_positions[iw], iat); - } - - const PosVectorSoa& getAllParticlePos() const override { return RSoA; } - PosType getOneParticlePos(size_t iat) const override { return RSoA[iat]; } +using RealSpacePositions = RealSpacePositionsT; -private: - ///particle positions in SoA layout - PosVectorSoa RSoA; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/RealSpacePositionsOMPTarget.h b/src/Particle/RealSpacePositionsOMPTarget.h index 310c71714a..9a7a34cf41 100644 --- a/src/Particle/RealSpacePositionsOMPTarget.h +++ b/src/Particle/RealSpacePositionsOMPTarget.h @@ -15,237 +15,12 @@ #ifndef QMCPLUSPLUS_REALSPACE_POSITIONS_OMPTARGET_H #define QMCPLUSPLUS_REALSPACE_POSITIONS_OMPTARGET_H -#include "Particle/DynamicCoordinates.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "ParticleSet.h" -#include "ResourceCollection.h" +#include "Configuration.h" +#include "Particle/RealSpacePositionsTOMPTarget.h" namespace qmcplusplus { -/** Introduced to handle virtual moves and ratio computations, e.g. for non-local PP evaluations. - */ -class RealSpacePositionsOMPTarget : public DynamicCoordinates -{ -public: - RealSpacePositionsOMPTarget() - : DynamicCoordinates(DynamicCoordinateKind::DC_POS_OFFLOAD), is_host_position_changed_(false) - {} - RealSpacePositionsOMPTarget(const RealSpacePositionsOMPTarget& in) - : DynamicCoordinates(DynamicCoordinateKind::DC_POS_OFFLOAD), RSoA(in.RSoA) - { - RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); - updateH2D(); - } - - std::unique_ptr makeClone() override - { - return std::make_unique(*this); - } - - void resize(size_t n) override - { - if (RSoA.size() != n) - { - RSoA.resize(n); - RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); - } - } - - size_t size() const override { return RSoA_hostview.size(); } - - void setAllParticlePos(const ParticlePos& R) override - { - resize(R.size()); - RSoA_hostview.copyIn(R); - updateH2D(); - is_nw_new_pos_prepared = false; - } - - void setOneParticlePos(const PosType& pos, size_t iat) override - { - RSoA_hostview(iat) = pos; - is_host_position_changed_ = true; - /* This was too slow due to overhead. - RealType x = pos[0]; - RealType y = pos[1]; - RealType z = pos[2]; - RealType* data = RSoA.data(); - size_t offset = RSoA.capacity(); - - PRAGMA_OFFLOAD("omp target map(to : x, y, z, iat)") - { - data[iat] = x; - data[iat + offset] = y; - data[iat + offset * 2] = z; - } - */ - } - - void mw_copyActivePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions) const override - { - assert(this == &coords_list.getLeader()); - auto& coords_leader = coords_list.getCastedLeader(); - - const auto nw = coords_list.size(); - auto& mw_new_pos = coords_leader.mw_mem_handle_.getResource().mw_new_pos; - mw_new_pos.resize(nw); - - for (int iw = 0; iw < nw; iw++) - mw_new_pos(iw) = new_positions[iw]; - - auto* mw_pos_ptr = mw_new_pos.data(); - PRAGMA_OFFLOAD("omp target update to(mw_pos_ptr[:QMCTraits::DIM * mw_new_pos.capacity()])") - - coords_leader.is_nw_new_pos_prepared = true; - } - - void mw_acceptParticlePos(const RefVectorWithLeader& coords_list, - size_t iat, - const std::vector& new_positions, - const std::vector& isAccepted) const override - { - assert(this == &coords_list.getLeader()); - const size_t nw = coords_list.size(); - auto& coords_leader = coords_list.getCastedLeader(); - MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_; - auto& mw_new_pos = mw_mem.mw_new_pos; - auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs; - auto& mw_accept_indices = mw_mem.mw_accept_indices; - - if (!is_nw_new_pos_prepared) - { - mw_copyActivePos(coords_list, iat, new_positions); - app_warning() << "This message only appear in unit tests. Report a bug if seen in production code." << std::endl; - } - - coords_leader.is_nw_new_pos_prepared = false; - - mw_accept_indices.resize(nw); - auto* restrict id_array = mw_accept_indices.data(); - - size_t num_accepted = 0; - for (int iw = 0; iw < nw; iw++) - if (isAccepted[iw]) - { - auto& coords = coords_list.getCastedElement(iw); - id_array[num_accepted] = iw; - // save new coordinates on host copy - coords.RSoA_hostview(iat) = mw_new_pos[iw]; - num_accepted++; - } - - // early return to avoid OpenMP runtime mishandling of size 0 in transfer/compute. - if (num_accepted == 0) - return; - - //offload to GPU - auto* restrict mw_pos_ptr = mw_new_pos.data(); - auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data(); - const size_t rsoa_stride = RSoA.capacity(); - const size_t mw_pos_stride = mw_new_pos.capacity(); - - PRAGMA_OFFLOAD("omp target teams distribute parallel for \ - map(always, to : id_array[:num_accepted])") - for (int i = 0; i < num_accepted; i++) - { - const int iw = id_array[i]; - RealType* RSoA_dev_ptr = mw_rosa_ptr[iw]; - for (int id = 0; id < QMCTraits::DIM; id++) - RSoA_dev_ptr[iat + rsoa_stride * id] = mw_pos_ptr[iw + mw_pos_stride * id]; - } - } - - const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } - PosType getOneParticlePos(size_t iat) const override { return RSoA_hostview[iat]; } - - void donePbyP() override - { - is_nw_new_pos_prepared = false; - if (is_host_position_changed_) - { - updateH2D(); - is_host_position_changed_ = false; - } - } - - const RealType* getDevicePtr() const { return RSoA.device_data(); } - - const auto& getFusedNewPosBuffer() const { return mw_mem_handle_.getResource().mw_new_pos; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const override - { - MultiWalkerMem& mw_mem = coords_list.getCastedLeader().mw_mem_handle_ = - collection.lendResource(); - - auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs); - const auto nw = coords_list.size(); - mw_rsoa_ptrs.resize(nw); - for (int iw = 0; iw < nw; iw++) - { - auto& coords = coords_list.getCastedElement(iw); - mw_rsoa_ptrs[iw] = coords.RSoA.device_data(); - } - mw_rsoa_ptrs.updateTo(); - } - - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& coords_list) const override - { - collection.takebackResource(coords_list.getCastedLeader().mw_mem_handle_); - } - - const auto& getMultiWalkerRSoADevicePtrs() const { return mw_mem_handle_.getResource().mw_rsoa_ptrs; } - -private: - ///particle positions in SoA layout - VectorSoaContainer>> RSoA; - - ///multi walker shared memory buffer - struct MultiWalkerMem : public Resource - { - ///one particle new/old positions in SoA layout - VectorSoaContainer>> mw_new_pos; - - /// accept list - Vector>> mw_accept_indices; - - /// RSoA device ptr list - Vector>> mw_rsoa_ptrs; - - MultiWalkerMem() : Resource("MultiWalkerMem") {} - - MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - ///host view of RSoA - PosVectorSoa RSoA_hostview; - - ///if true, host position has been changed while the device copy has not been updated. - bool is_host_position_changed_; - - ///if true, mw_new_pos has been updated with active positions. - bool is_nw_new_pos_prepared; +using RealSpacePositionsOMPTarget = RealSpacePositionsTOMPTarget; - void updateH2D() - { - RealType* data = RSoA.data(); - PRAGMA_OFFLOAD("omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])") - is_host_position_changed_ = false; - } -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/RealSpacePositionsT.h b/src/Particle/RealSpacePositionsT.h new file mode 100644 index 0000000000..29511d08d2 --- /dev/null +++ b/src/Particle/RealSpacePositionsT.h @@ -0,0 +1,67 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file RealSpacePostions.h + */ +#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_H +#define QMCPLUSPLUS_REALSPACE_POSITIONST_H + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/DynamicCoordinatesT.h" + +namespace qmcplusplus +{ +/** Introduced to handle virtual moves and ratio computations, e.g. for + * non-local PP evaluations. + */ +template +class RealSpacePositionsT : public DynamicCoordinatesT +{ +public: + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using RealType = typename DynamicCoordinatesT::RealType; + using PosType = typename DynamicCoordinatesT::PosType; + using PosVectorSoa = typename DynamicCoordinatesT::PosVectorSoa; + + RealSpacePositionsT() : DynamicCoordinatesT(DynamicCoordinateKind::DC_POS) {} + + std::unique_ptr> makeClone() override { return std::make_unique(*this); } + + void resize(size_t n) override { RSoA.resize(n); } + size_t size() const override { return RSoA.size(); } + + void setAllParticlePos(const ParticlePos& R) override + { + resize(R.size()); + RSoA.copyIn(R); + } + void setOneParticlePos(const PosType& pos, size_t iat) override { RSoA(iat) = pos; } + + void mw_acceptParticlePos(const RefVectorWithLeader>& coords_list, + size_t iat, + const std::vector& new_positions, + const std::vector& isAccepted) const override + { + assert(this == &coords_list.getLeader()); + for (size_t iw = 0; iw < isAccepted.size(); iw++) + if (isAccepted[iw]) + coords_list[iw].setOneParticlePos(new_positions[iw], iat); + } + + const PosVectorSoa& getAllParticlePos() const override { return RSoA; } + PosType getOneParticlePos(size_t iat) const override { return RSoA[iat]; } + +private: + /// particle positions in SoA layout + PosVectorSoa RSoA; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/RealSpacePositionsTOMPTarget.h b/src/Particle/RealSpacePositionsTOMPTarget.h new file mode 100644 index 0000000000..bfd154cf8a --- /dev/null +++ b/src/Particle/RealSpacePositionsTOMPTarget.h @@ -0,0 +1,264 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file RealSpacePostionsOMPTarget.h + */ +#ifndef QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H +#define QMCPLUSPLUS_REALSPACE_POSITIONST_OMPTARGET_H + +#include "Configuration.h" +#include "OMPTarget/OMPallocator.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "Particle/DynamicCoordinatesT.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/** Introduced to handle virtual moves and ratio computations, e.g. for + * non-local PP evaluations. + */ +template +class RealSpacePositionsTOMPTarget : public DynamicCoordinatesT +{ +public: + using ParticlePos = typename LatticeParticleTraits::ParticlePos; + using RealType = typename DynamicCoordinatesT::RealType; + using PosType = typename DynamicCoordinatesT::PosType; + using PosVectorSoa = typename DynamicCoordinatesT::PosVectorSoa; + static constexpr auto DIM = ParticleSetTraits::DIM; + + RealSpacePositionsTOMPTarget() + : DynamicCoordinatesT(DynamicCoordinateKind::DC_POS_OFFLOAD), is_host_position_changed_(false) + {} + RealSpacePositionsTOMPTarget(const RealSpacePositionsTOMPTarget& in) + : DynamicCoordinatesT(DynamicCoordinateKind::DC_POS_OFFLOAD), RSoA(in.RSoA) + { + RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); + updateH2D(); + } + + std::unique_ptr> makeClone() override + { + return std::make_unique(*this); + } + + void resize(size_t n) override + { + if (RSoA.size() != n) + { + RSoA.resize(n); + RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data()); + } + } + + size_t size() const override { return RSoA_hostview.size(); } + + void setAllParticlePos(const ParticlePos& R) override + { + resize(R.size()); + RSoA_hostview.copyIn(R); + updateH2D(); + is_nw_new_pos_prepared = false; + } + + void setOneParticlePos(const PosType& pos, size_t iat) override + { + RSoA_hostview(iat) = pos; + is_host_position_changed_ = true; + /* This was too slow due to overhead. + RealType x = pos[0]; + RealType y = pos[1]; + RealType z = pos[2]; + RealType* data = RSoA.data(); + size_t offset = RSoA.capacity(); + + PRAGMA_OFFLOAD("omp target map(to : x, y, z, iat)") + { + data[iat] = x; + data[iat + offset] = y; + data[iat + offset * 2] = z; + } + */ + } + + void mw_copyActivePos(const RefVectorWithLeader>& coords_list, + size_t iat, + const std::vector& new_positions) const override + { + assert(this == &coords_list.getLeader()); + auto& coords_leader = coords_list.template getCastedLeader(); + + const auto nw = coords_list.size(); + auto& mw_new_pos = coords_leader.mw_mem_handle_.getResource().mw_new_pos; + mw_new_pos.resize(nw); + + for (int iw = 0; iw < nw; iw++) + mw_new_pos(iw) = new_positions[iw]; + + auto* mw_pos_ptr = mw_new_pos.data(); + PRAGMA_OFFLOAD("omp target update to(\ + mw_pos_ptr[DIM * mw_new_pos.capacity()])") + + coords_leader.is_nw_new_pos_prepared = true; + } + + void mw_acceptParticlePos(const RefVectorWithLeader>& coords_list, + size_t iat, + const std::vector& new_positions, + const std::vector& isAccepted) const override + { + assert(this == &coords_list.getLeader()); + const size_t nw = coords_list.size(); + auto& coords_leader = coords_list.template getCastedLeader(); + MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_; + auto& mw_new_pos = mw_mem.mw_new_pos; + auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs; + auto& mw_accept_indices = mw_mem.mw_accept_indices; + + if (!is_nw_new_pos_prepared) + { + mw_copyActivePos(coords_list, iat, new_positions); + app_warning() << "This message only appear in unit tests. Report a " + "bug if seen in production code." + << std::endl; + } + + coords_leader.is_nw_new_pos_prepared = false; + + mw_accept_indices.resize(nw); + auto* restrict id_array = mw_accept_indices.data(); + + size_t num_accepted = 0; + for (int iw = 0; iw < nw; iw++) + if (isAccepted[iw]) + { + auto& coords = coords_list.template getCastedElement(iw); + id_array[num_accepted] = iw; + // save new coordinates on host copy + coords.RSoA_hostview(iat) = mw_new_pos[iw]; + num_accepted++; + } + + // early return to avoid OpenMP runtime mishandling of size 0 in + // transfer/compute. + if (num_accepted == 0) + return; + + // offload to GPU + auto* restrict mw_pos_ptr = mw_new_pos.data(); + auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data(); + const size_t rsoa_stride = RSoA.capacity(); + const size_t mw_pos_stride = mw_new_pos.capacity(); + + PRAGMA_OFFLOAD("omp target teams distribute parallel for \ + is_device_ptr(mw_pos_ptr, mw_rosa_ptr) \ + map(always, to : id_array[:num_accepted])") + for (int i = 0; i < num_accepted; i++) + { + const int iw = id_array[i]; + RealType* RSoA_dev_ptr = mw_rosa_ptr[iw]; + for (int id = 0; id < QMCTraits::DIM; id++) + RSoA_dev_ptr[iat + rsoa_stride * id] = mw_pos_ptr[iw + mw_pos_stride * id]; + } + } + + const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; } + PosType getOneParticlePos(size_t iat) const override { return RSoA_hostview[iat]; } + + void donePbyP() override + { + is_nw_new_pos_prepared = false; + if (is_host_position_changed_) + { + updateH2D(); + is_host_position_changed_ = false; + } + } + + const RealType* getDevicePtr() const { return RSoA.device_data(); } + + const auto& getFusedNewPosBuffer() const { return mw_mem_handle_.getResource().mw_new_pos; } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& coords_list) const override + { + MultiWalkerMem& mw_mem = coords_list.template getCastedLeader().mw_mem_handle_ = + collection.lendResource(); + + auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs); + const auto nw = coords_list.size(); + mw_rsoa_ptrs.resize(nw); + for (int iw = 0; iw < nw; iw++) + { + auto& coords = coords_list.template getCastedElement(iw); + mw_rsoa_ptrs[iw] = coords.RSoA.device_data(); + } + mw_rsoa_ptrs.updateTo(); + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& coords_list) const override + { + collection.takebackResource(coords_list.template getCastedLeader().mw_mem_handle_); + } + + const auto& getMultiWalkerRSoADevicePtrs() const { return mw_mem_handle_.getResource().mw_rsoa_ptrs; } + +private: + /// particle positions in SoA layout + VectorSoaContainer>> RSoA; + + /// multi walker shared memory buffer + struct MultiWalkerMem : public Resource + { + /// one particle new/old positions in SoA layout + VectorSoaContainer>> mw_new_pos; + + /// accept list + Vector>> mw_accept_indices; + + /// RSoA device ptr list + Vector>> mw_rsoa_ptrs; + + MultiWalkerMem() : Resource("MultiWalkerMem") {} + + MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + }; + + ResourceHandle mw_mem_handle_; + + /// host view of RSoA + PosVectorSoa RSoA_hostview; + + /// if true, host position has been changed while the device copy has not + /// been updated. + bool is_host_position_changed_; + + /// if true, mw_new_pos has been updated with active positions. + bool is_nw_new_pos_prepared; + + void updateH2D() + { + RealType* data = RSoA.data(); + PRAGMA_OFFLOAD("omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])") + is_host_position_changed_ = false; + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/Reptile.h b/src/Particle/Reptile.h index 1738edcf22..d32143a4ca 100644 --- a/src/Particle/Reptile.h +++ b/src/Particle/Reptile.h @@ -25,256 +25,12 @@ #ifndef QMCPLUSPLUS_REPTILE_H #define QMCPLUSPLUS_REPTILE_H -#include "QMCDrivers/DriftOperators.h" -#include "QMCDrivers/WalkerProperties.h" #include "Configuration.h" -#include "Walker.h" +#include "Particle/ReptileT.h" namespace qmcplusplus { -class MCWalkerConfiguration; - -class Reptile : public QMCTraits -{ -public: - using WP = WalkerProperties::Indexes; - using Walker_t = MCWalkerConfiguration::Walker_t; - //using Buffer_t = Walker_t::Buffer_t ; - // using Walker_t = MCWalkerConfiguration::Walker_t; - using WalkerIter_t = MCWalkerConfiguration::iterator; - using ReptileConfig_t = std::vector; - - std::vector Action; - std::vector TransProb; - - RealType forwardprob; - RealType backwardprob; - RealType forwardaction; - RealType backwardaction; - - RealType tau; - - MCWalkerConfiguration& w; - WalkerIter_t repstart, repend; - IndexType direction, headindex, nbeads; - Walker_t* prophead; - - inline Reptile(MCWalkerConfiguration& W, WalkerIter_t start, WalkerIter_t end) - : w(W), - repstart(start), - repend(end), - direction(1), - headindex(0), - prophead(0) //, r2prop(0.0), r2accept(0.0),tau(0.0) - { - Action.resize(3); - Action[0] = w.addProperty("ActionBackward"); - Action[1] = w.addProperty("ActionForward"); - Action[2] = w.addProperty("ActionLocal"); - TransProb.resize(2); - TransProb[0] = w.addProperty("TransProbBackward"); - TransProb[1] = w.addProperty("TransProbForward"); - - nbeads = repend - repstart; - } - - ~Reptile() {} - - inline IndexType size() { return nbeads; } - - inline Walker_t& operator[](IndexType i) { return getWalker(getBeadIndex(i)); } - - inline IndexType wrapIndex(IndexType repindex) { return (repindex % nbeads + nbeads) % nbeads; } - - inline Walker_t& getWalker(IndexType i) - { - WalkerIter_t bead = repstart + wrapIndex(i); - return **bead; - } - - inline IndexType getBeadIndex(IndexType i) { return wrapIndex(headindex + direction * i); } - inline Walker_t& getBead(IndexType i) { return getWalker(getBeadIndex(i)); } - inline Walker_t& getHead() { return getWalker(getBeadIndex(0)); } - inline Walker_t& getTail() { return getWalker(getBeadIndex(nbeads - 1)); } - inline Walker_t& getNext() { return getWalker(getBeadIndex(nbeads - 2)); } - inline Walker_t& getCenter() { return getWalker(getBeadIndex((nbeads - 1) / 2)); } - //inline void setProposedHead(){ - - inline void flip() - { - // direction*=-1; - // headindex = getBeadIndex(nbeads-1); - headindex = wrapIndex(headindex - direction); - direction *= -1; - } - - inline void setDirection(IndexType dir) { direction = dir; } - - inline void setBead(Walker_t& walker, IndexType i) - { - IndexType index = getBeadIndex(i); - Walker_t& newbead(getWalker(index)); - newbead = walker; //This should be a hard copy - } - - inline void setHead(Walker_t& overwrite) - { - //overwrite last element. - headindex = getBeadIndex(nbeads - 1); //sets to position of tail. - Walker_t& newhead(getBead(0)); - newhead = overwrite; - } - //This function does two things: 1.) Moves the reptile forward 1 step. 2.) Returns the new head. - inline Walker_t& getNewHead() - { - //overwrite last element. - headindex = getBeadIndex(nbeads - 1); //sets to position of tail. - return getWalker(headindex); - } - - void saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType actionindex = 2; - if (direction != 0) - actionindex = (1 - d * direction) / 2; - walker.Properties(nPsi, Action[actionindex]) = val; - } - - RealType getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType actionindex = 2; - if (d != 0) - actionindex = (1 - direction * d) / 2; - - return walker.Properties(nPsi, Action[actionindex]); - } - - RealType getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d, IndexType nPsi = 0) - { - RealType af = getDirectionalAction(old_walker, +1, nPsi); - RealType ab = getDirectionalAction(new_walker, -1, nPsi); - RealType a0 = getDirectionalAction(old_walker, 0, nPsi) + getDirectionalAction(new_walker, 0, nPsi); - return af + ab + a0; - } - - void saveTransProb(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - walker.Properties(nPsi, TransProb[transindex]) = val; - } - - void saveTransProb(ParticleSet& W, IndexType d, RealType val, IndexType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - W.Properties(nPsi, TransProb[transindex]) = val; - } - RealType getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - return walker.Properties(nPsi, TransProb[transindex]); - } - RealType getTransProb(ParticleSet& W, IndexType d, RealType nPsi = 0) - { - //IndexType repdirection=circbuffer.get_direction(); - IndexType transindex = (1 - d * direction) / 2; - return W.Properties(nPsi, TransProb[transindex]); - } - - inline void printState() - { - app_log() << "********PRINT REPTILE STATE*********\n"; - app_log() << "Direction=" << direction << " Headindex=" << headindex << " tail=" << getBeadIndex(nbeads - 1) - << "\n next=" << getBeadIndex(nbeads - 2) << " nbeads=" << nbeads << std::endl; - app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]\tAction[2]\t\n"; - for (int i = 0; i < nbeads; i++) - { - app_log() << i << "\t" << getBeadIndex(i) << "\t" << getBead(i).Properties(WP::LOCALENERGY) << "\t" - << getBead(i).Properties(Action[0]) << "\t" << getBead(i).Properties(Action[1]) << "\t" - << getBead(i).Properties(Action[2]) << "\n"; - } - app_log() << "POSITIONS===============:\n"; - for (int i = 0; i < nbeads; i++) - { - // app_log()<length of reptile, then return the last bead. if t<0; return the first bead. - inline Walker_t::ParticlePos linearInterp(RealType t) - { - IndexType nbead = - IndexType(t / tau); //Calculate the lower bound on the timeslice. t is between binnum*Tau and (binnum+1)Tau - RealType beadfrac = t / tau - nbead; //the fractional coordinate between n and n+1 bead - if (nbead <= 0) - { - ParticleSet::ParticlePos result = getHead().R; - return result; - } - else if (nbead >= nbeads - 1) - { - ParticleSet::ParticlePos result = getTail().R; - return result; - } - - else - { - Walker_t::ParticlePos dR(getBead(nbead + 1).R), interpR(getBead(nbead).R); - dR = dR - getBead(nbead).R; - - interpR = getBead(nbead).R + beadfrac * dR; - return interpR; - } - } - inline ReptileConfig_t getReptileSlicePositions(RealType tau, RealType beta) - { - IndexType nbeads_new = IndexType(beta / tau); - ReptileConfig_t new_reptile_coords(0); - - for (IndexType i = 0; i < nbeads_new; i++) - new_reptile_coords.push_back(linearInterp(tau * i)); - - return new_reptile_coords; - } - - inline void setReptileSlicePositions(ReptileConfig_t& rept) - { - if (rept.size() == nbeads) - { - for (int i = 0; i < nbeads; i++) - getBead(i).R = rept[i]; - } - else - ; - } - - inline void setReptileSlicePositions(Walker_t::ParticlePos R) - { - for (int i = 0; i < nbeads; i++) - getBead(i).R = R; - } -}; - +using Reptile = ReptileT; } // namespace qmcplusplus #endif diff --git a/src/Particle/ReptileT.h b/src/Particle/ReptileT.h new file mode 100644 index 0000000000..1e1835ed53 --- /dev/null +++ b/src/Particle/ReptileT.h @@ -0,0 +1,276 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_REPTILET_H +#define QMCPLUSPLUS_REPTILET_H + +#include "Configuration.h" +#include "ParticleSetTraits.h" +#include "QMCDrivers/DriftOperators.h" +#include "QMCDrivers/WalkerProperties.h" +#include "Walker.h" + +namespace qmcplusplus +{ +template +class MCWalkerConfigurationT; + +template +class ReptileT +{ +public: + using WP = WalkerProperties::Indexes; + using Walker_t = typename MCWalkerConfigurationT::Walker_t; + // using Buffer_t = Walker_t::Buffer_t ; + // using Walker_t = MCWalkerConfiguration::Walker_t; + using WalkerIter_t = typename MCWalkerConfigurationT::iterator; + using ReptileConfig_t = std::vector; + using IndexType = typename ParticleSetTraits::IndexType; + using RealType = typename ParticleSetTraits::RealType; + + std::vector Action; + std::vector TransProb; + + RealType forwardprob; + RealType backwardprob; + RealType forwardaction; + RealType backwardaction; + + RealType tau; + + MCWalkerConfigurationT& w; + WalkerIter_t repstart, repend; + IndexType direction, headindex, nbeads; + Walker_t* prophead; + + inline ReptileT(MCWalkerConfigurationT& W, WalkerIter_t start, WalkerIter_t end) + : w(W), + repstart(start), + repend(end), + direction(1), + headindex(0), + prophead(0) //, r2prop(0.0), r2accept(0.0),tau(0.0) + { + Action.resize(3); + Action[0] = w.addProperty("ActionBackward"); + Action[1] = w.addProperty("ActionForward"); + Action[2] = w.addProperty("ActionLocal"); + TransProb.resize(2); + TransProb[0] = w.addProperty("TransProbBackward"); + TransProb[1] = w.addProperty("TransProbForward"); + + nbeads = repend - repstart; + } + + ~ReptileT() {} + + inline IndexType size() { return nbeads; } + + inline Walker_t& operator[](IndexType i) { return getWalker(getBeadIndex(i)); } + + inline IndexType wrapIndex(IndexType repindex) { return (repindex % nbeads + nbeads) % nbeads; } + + inline Walker_t& getWalker(IndexType i) + { + WalkerIter_t bead = repstart + wrapIndex(i); + return **bead; + } + + inline IndexType getBeadIndex(IndexType i) { return wrapIndex(headindex + direction * i); } + inline Walker_t& getBead(IndexType i) { return getWalker(getBeadIndex(i)); } + inline Walker_t& getHead() { return getWalker(getBeadIndex(0)); } + inline Walker_t& getTail() { return getWalker(getBeadIndex(nbeads - 1)); } + inline Walker_t& getNext() { return getWalker(getBeadIndex(nbeads - 2)); } + inline Walker_t& getCenter() { return getWalker(getBeadIndex((nbeads - 1) / 2)); } + // inline void setProposedHead(){ + + inline void flip() + { + // direction*=-1; + // headindex = getBeadIndex(nbeads-1); + headindex = wrapIndex(headindex - direction); + direction *= -1; + } + + inline void setDirection(IndexType dir) { direction = dir; } + + inline void setBead(Walker_t& walker, IndexType i) + { + IndexType index = getBeadIndex(i); + Walker_t& newbead(getWalker(index)); + newbead = walker; // This should be a hard copy + } + + inline void setHead(Walker_t& overwrite) + { + // overwrite last element. + headindex = getBeadIndex(nbeads - 1); // sets to position of tail. + Walker_t& newhead(getBead(0)); + newhead = overwrite; + } + // This function does two things: 1.) Moves the reptile forward 1 + // step. 2.) Returns the new head. + inline Walker_t& getNewHead() + { + // overwrite last element. + headindex = getBeadIndex(nbeads - 1); // sets to position of tail. + return getWalker(headindex); + } + + void saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType actionindex = 2; + if (direction != 0) + actionindex = (1 - d * direction) / 2; + walker.Properties(nPsi, Action[actionindex]) = val; + } + + RealType getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType actionindex = 2; + if (d != 0) + actionindex = (1 - direction * d) / 2; + + return walker.Properties(nPsi, Action[actionindex]); + } + + RealType getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d, IndexType nPsi = 0) + { + RealType af = getDirectionalAction(old_walker, +1, nPsi); + RealType ab = getDirectionalAction(new_walker, -1, nPsi); + RealType a0 = getDirectionalAction(old_walker, 0, nPsi) + getDirectionalAction(new_walker, 0, nPsi); + return af + ab + a0; + } + + void saveTransProb(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + walker.Properties(nPsi, TransProb[transindex]) = val; + } + + void saveTransProb(ParticleSetT& W, IndexType d, RealType val, IndexType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + W.Properties(nPsi, TransProb[transindex]) = val; + } + RealType getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + return walker.Properties(nPsi, TransProb[transindex]); + } + RealType getTransProb(ParticleSetT& W, IndexType d, RealType nPsi = 0) + { + // IndexType repdirection=circbuffer.get_direction(); + IndexType transindex = (1 - d * direction) / 2; + return W.Properties(nPsi, TransProb[transindex]); + } + + inline void printState() + { + app_log() << "********PRINT REPTILE STATE*********\n"; + app_log() << "Direction=" << direction << " Headindex=" << headindex << " tail=" << getBeadIndex(nbeads - 1) + << "\n next=" << getBeadIndex(nbeads - 2) << " nbeads=" << nbeads << std::endl; + app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]" + "\tAction[2]\t\n"; + for (int i = 0; i < nbeads; i++) + { + app_log() << i << "\t" << getBeadIndex(i) << "\t" << getBead(i).Properties(WP::LOCALENERGY) << "\t" + << getBead(i).Properties(Action[0]) << "\t" << getBead(i).Properties(Action[1]) << "\t" + << getBead(i).Properties(Action[2]) << "\n"; + } + app_log() << "POSITIONS===============:\n"; + for (int i = 0; i < nbeads; i++) + { + // app_log()<length of reptile, then return the last + // bead. if t<0; return the first bead. + inline typename Walker_t::ParticlePos linearInterp(RealType t) + { + IndexType nbead = IndexType(t / tau); // Calculate the lower bound on the timeslice. + // t is between binnum*Tau and (binnum+1)Tau + RealType beadfrac = t / tau - nbead; // the fractional coordinate between n and n+1 bead + if (nbead <= 0) + { + typename ParticleSetT::ParticlePos result = getHead().R; + return result; + } + else if (nbead >= nbeads - 1) + { + typename ParticleSetT::ParticlePos result = getTail().R; + return result; + } + + else + { + typename Walker_t::ParticlePos dR(getBead(nbead + 1).R), interpR(getBead(nbead).R); + dR = dR - getBead(nbead).R; + + interpR = getBead(nbead).R + beadfrac * dR; + return interpR; + } + } + inline ReptileConfig_t getReptileSlicePositions(RealType tau, RealType beta) + { + IndexType nbeads_new = IndexType(beta / tau); + ReptileConfig_t new_reptile_coords(0); + + for (IndexType i = 0; i < nbeads_new; i++) + new_reptile_coords.push_back(linearInterp(tau * i)); + + return new_reptile_coords; + } + + inline void setReptileSlicePositions(ReptileConfig_t& rept) + { + if (rept.size() == nbeads) + { + for (int i = 0; i < nbeads; i++) + getBead(i).R = rept[i]; + } + else + ; + } + + inline void setReptileSlicePositions(typename Walker_t::ParticlePos R) + { + for (int i = 0; i < nbeads; i++) + getBead(i).R = R; + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SampleStack.h b/src/Particle/SampleStack.h index 3614f53558..9565ceac02 100644 --- a/src/Particle/SampleStack.h +++ b/src/Particle/SampleStack.h @@ -18,50 +18,12 @@ #ifndef QMCPLUSPLUS_SAMPLE_STACK_H #define QMCPLUSPLUS_SAMPLE_STACK_H -#include -#include "Particle/ParticleSet.h" -#include "Particle/MCSample.h" -#include "Particle/Walker.h" -#include "Particle/WalkerConfigurations.h" +#include "Configuration.h" +#include "Particle/SampleStackT.h" namespace qmcplusplus { -class SampleStack -{ -public: - using PropertySetType = QMCTraits::PropertySetType; - - size_t getMaxSamples() const { return max_samples_; } - - bool empty() const { return sample_vector_.empty(); } - - const MCSample& getSample(size_t i) const; - - //@{save/load/clear function for optimization - inline size_t getNumSamples() const { return current_sample_count_; } - ///set the number of max samples per rank. - void setMaxSamples(size_t n, size_t number_of_ranks = 1); - /// Global number of samples is number of samples per rank * number of ranks - size_t getGlobalNumSamples() const { return global_num_samples_; } - /// load a single sample from SampleStack - void loadSample(ParticleSet& pset, size_t iw) const; - - void appendSample(MCSample&& sample); - - ///clear the ensemble - void clearEnsemble(); - //@} - /// Set the sample count to zero but preserve the storage - void resetSampleCount(); - -private: - size_t max_samples_{10}; - size_t current_sample_count_{0}; - size_t global_num_samples_{max_samples_}; - - std::vector sample_vector_; -}; - +using SampleStack = SampleStackT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SampleStack.cpp b/src/Particle/SampleStackT.cpp similarity index 61% rename from src/Particle/SampleStack.cpp rename to src/Particle/SampleStackT.cpp index c2720b9b8c..16c3628877 100644 --- a/src/Particle/SampleStack.cpp +++ b/src/Particle/SampleStackT.cpp @@ -9,8 +9,8 @@ // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "SampleStackT.h" -#include "SampleStack.h" #include "Utilities/IteratorUtility.h" namespace qmcplusplus @@ -20,17 +20,23 @@ namespace qmcplusplus * @param n number of samples per rank * @param num_ranks number of ranks. Used to set global number of samples. */ -void SampleStack::setMaxSamples(size_t n, size_t num_ranks) +template +void SampleStackT::setMaxSamples(size_t n, size_t num_ranks) { - max_samples_ = n; - global_num_samples_ = n * num_ranks; + max_samples_ = n; + global_num_samples_ = n * num_ranks; current_sample_count_ = std::min(current_sample_count_, max_samples_); sample_vector_.resize(n, MCSample(0)); } -const MCSample& SampleStack::getSample(size_t i) const { return sample_vector_[i]; } +template +const MCSample& SampleStackT::getSample(size_t i) const +{ + return sample_vector_[i]; +} -void SampleStack::appendSample(MCSample&& sample) +template +void SampleStackT::appendSample(MCSample&& sample) { // Ignore samples in excess of the expected number of samples if (current_sample_count_ < max_samples_) @@ -42,19 +48,29 @@ void SampleStack::appendSample(MCSample&& sample) /** load a single sample from SampleStack */ -void SampleStack::loadSample(ParticleSet& pset, size_t iw) const +template +void SampleStackT::loadSample(ParticleSetT& pset, size_t iw) const { pset.R = sample_vector_[iw].R; pset.spins = sample_vector_[iw].spins; } -void SampleStack::clearEnsemble() +template +void SampleStackT::clearEnsemble() { sample_vector_.clear(); current_sample_count_ = 0; } -void SampleStack::resetSampleCount() { current_sample_count_ = 0; } +template +void SampleStackT::resetSampleCount() +{ + current_sample_count_ = 0; +} +template class SampleStackT; +template class SampleStackT; +template class SampleStackT>; +template class SampleStackT>; } // namespace qmcplusplus diff --git a/src/Particle/SampleStackT.h b/src/Particle/SampleStackT.h new file mode 100644 index 0000000000..9baf8a018b --- /dev/null +++ b/src/Particle/SampleStackT.h @@ -0,0 +1,62 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SAMPLE_STACKT_H +#define QMCPLUSPLUS_SAMPLE_STACKT_H + +#include "Particle/MCSample.h" +#include "Particle/ParticleSetT.h" +#include "Particle/Walker.h" +#include "Particle/WalkerConfigurations.h" + +#include + +namespace qmcplusplus +{ +template +class SampleStackT +{ +public: + using PropertySetType = typename ParticleSetTraits::PropertySetType; + + size_t getMaxSamples() const { return max_samples_; } + + bool empty() const { return sample_vector_.empty(); } + + const MCSample& getSample(size_t i) const; + + //@{save/load/clear function for optimization + inline size_t getNumSamples() const { return current_sample_count_; } + /// set the number of max samples per rank. + void setMaxSamples(size_t n, size_t number_of_ranks = 1); + /// Global number of samples is number of samples per rank * number of ranks + size_t getGlobalNumSamples() const { return global_num_samples_; } + /// load a single sample from SampleStack + void loadSample(ParticleSetT& pset, size_t iw) const; + + void appendSample(MCSample&& sample); + + /// clear the ensemble + void clearEnsemble(); + //@} + /// Set the sample count to zero but preserve the storage + void resetSampleCount(); + +private: + size_t max_samples_{10}; + size_t current_sample_count_{0}; + size_t global_num_samples_{max_samples_}; + + std::vector sample_vector_; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SimulationCell.h b/src/Particle/SimulationCell.h index 87f630812f..f632846d99 100644 --- a/src/Particle/SimulationCell.h +++ b/src/Particle/SimulationCell.h @@ -14,41 +14,11 @@ #define QMCPLUSPLUS_SIMULATIONCELL_H #include "Configuration.h" -#include "LongRange/KContainer.h" +#include "SimulationCellT.h" namespace qmcplusplus { -class ParticleSetPool; +using SimulationCell = SimulationCellT; -class SimulationCell -{ -public: - using Lattice = PtclOnLatticeTraits::ParticleLayout; - - SimulationCell(); - SimulationCell(const Lattice& lattice); - - const Lattice& getLattice() const { return lattice_; } - const Lattice& getPrimLattice() const { return primative_lattice_; } - const Lattice& getLRBox() const { return LRBox_; } - - void resetLRBox(); - - /// access k_lists_ read only - const KContainer& getKLists() const { return k_lists_; } - -private: - ///simulation cell lattice - Lattice lattice_; - ///Primative cell lattice - Lattice primative_lattice_; - ///long-range box - Lattice LRBox_; - - /// K-Vector List. - KContainer k_lists_; - - friend class ParticleSetPool; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SimulationCell.cpp b/src/Particle/SimulationCellT.cpp similarity index 76% rename from src/Particle/SimulationCell.cpp rename to src/Particle/SimulationCellT.cpp index cbd9b5f053..aa8db95db7 100644 --- a/src/Particle/SimulationCell.cpp +++ b/src/Particle/SimulationCellT.cpp @@ -9,26 +9,27 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// - -#include "SimulationCell.h" +#include "SimulationCellT.h" +#include "Platforms/Host/OutputManager.h" namespace qmcplusplus { +template +SimulationCellT::SimulationCellT() = default; -SimulationCell::SimulationCell() = default; - -SimulationCell::SimulationCell(const Lattice& lattice) - : lattice_(lattice) +template +SimulationCellT::SimulationCellT(const Lattice& lattice) : lattice_(lattice) { resetLRBox(); } -void SimulationCell::resetLRBox() +template +void SimulationCellT::resetLRBox() { if (lattice_.SuperCellEnum != SUPERCELL_OPEN) { lattice_.SetLRCutoffs(lattice_.Rv); - LRBox_ = lattice_; + LRBox_ = lattice_; bool changed = false; if (lattice_.SuperCellEnum == SUPERCELL_SLAB && lattice_.VacuumScale != 1.0) { @@ -62,4 +63,9 @@ void SimulationCell::resetLRBox() k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim); } } -} + +template class SimulationCellT; +template class SimulationCellT; +template class SimulationCellT>; +template class SimulationCellT>; +} // namespace qmcplusplus diff --git a/src/Particle/SimulationCellT.h b/src/Particle/SimulationCellT.h new file mode 100644 index 0000000000..279d9b0fc5 --- /dev/null +++ b/src/Particle/SimulationCellT.h @@ -0,0 +1,55 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SIMULATIONCELLT_H +#define QMCPLUSPLUS_SIMULATIONCELLT_H + +#include "LongRange/KContainerT.h" +#include "ParticleSetTraits.h" + +namespace qmcplusplus +{ +template +class ParticleSetPoolT; + +template +class SimulationCellT +{ +public: + using Lattice = typename LatticeParticleTraits::ParticleLayout; + + SimulationCellT(); + SimulationCellT(const Lattice& lattice); + + const Lattice& getLattice() const { return lattice_; } + const Lattice& getPrimLattice() const { return primative_lattice_; } + const Lattice& getLRBox() const { return LRBox_; } + + void resetLRBox(); + + /// access k_lists_ read only + const KContainerT& getKLists() const { return k_lists_; } + +private: + /// simulation cell lattice + Lattice lattice_; + /// Primative cell lattice + Lattice primative_lattice_; + /// long-range box + Lattice LRBox_; + + /// K-Vector List. + KContainerT k_lists_; + + friend class ParticleSetPoolT; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableAA.h b/src/Particle/SoaDistanceTableAA.h index ec9e8315ca..d8ecec84e8 100644 --- a/src/Particle/SoaDistanceTableAA.h +++ b/src/Particle/SoaDistanceTableAA.h @@ -13,197 +13,11 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AA_H #define QMCPLUSPLUS_DTDIMPL_AA_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" +#include "Particle/SoaDistanceTableAAT.h" namespace qmcplusplus { -/**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for dense case - */ template -struct SoaDistanceTableAA : public DTD_BConds, public DistanceTableAA -{ - /// actual memory for dist and displacements_ - aligned_vector memory_pool_; - - SoaDistanceTableAA(ParticleSet& target) - : DTD_BConds(target.getLattice()), - DistanceTableAA(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(num_targets_)), -#if !defined(NDEBUG) - old_prepared_elec_id_(-1), -#endif - evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") + target.getName() + "_" + target.getName(), - timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAA::move_") + target.getName() + "_" + target.getName(), - timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAA::update_") + target.getName() + "_" + target.getName(), - timer_level_fine)) - { - resize(); - } - - SoaDistanceTableAA() = delete; - SoaDistanceTableAA(const SoaDistanceTableAA&) = delete; - ~SoaDistanceTableAA() override {} - - size_t compute_size(int N) const - { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; - } - - void resize() - { - // initialize memory containers and views - const size_t total_size = compute_size(num_targets_); - memory_pool_.resize(total_size * (1 + D)); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); - displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); - } - - old_r_.resize(num_targets_); - old_dr_.resize(num_targets_); - temp_r_.resize(num_targets_); - temp_dr_.resize(num_targets_); - } - - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); - constexpr T BigR = std::numeric_limits::max(); - for (int iat = 1; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), - displacements_[iat], 0, iat, iat); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - -#if !defined(NDEBUG) - old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, 0, - num_targets_, iat); - // set up old_r_ and old_dr_ for moves may get accepted. - if (prepare_old) - { - //recompute from scratch - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), old_r_.data(), old_dr_, - 0, num_targets_, iat); - old_r_[iat] = std::numeric_limits::max(); //assign a big number - } - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - //ensure there are neighbors - assert(num_targets_ > 1); - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_targets_; ++jat) - if (temp_r_[jat] < min_dist && jat != iat) - { - min_dist = temp_r_[jat]; - index = jat; - } - assert(index >= 0); - dr = temp_dr_[index]; - } - else - { - for (int jat = 0; jat < iat; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - for (int jat = iat + 1; jat < num_targets_; ++jat) - if (distances_[jat][iat] < min_dist) - { - min_dist = distances_[jat][iat]; - index = jat; - } - assert(index != iat && index >= 0); - if (index < iat) - dr = displacements_[iat][index]; - else - dr = displacements_[index][iat]; - } - r = min_dist; - return index; - } - - /** After accepting the iat-th particle, update the iat-th row of distances_ and displacements_. - * Upper triangle is not needed in the later computation and thus not updated - */ - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - //update [0, iat) - const int nupdate = iat; - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[iat].data(idim)); - //copy column - for (size_t i = iat + 1; i < num_targets_; ++i) - { - distances_[i][iat] = temp_r_[i]; - displacements_[i](iat) = -temp_dr_[i]; - } - } - - void updatePartial(IndexType jat, bool from_temp) override - { - ScopedTimer local_timer(update_timer_); - //update [0, jat) - const int nupdate = jat; - if (from_temp) - { - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - else - { - assert(old_prepared_elec_id_ == jat); - //copy row - assert(nupdate <= old_r_.size()); - std::copy_n(old_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(old_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - } - -private: - ///number of targets with padding - const size_t num_targets_padded_; -#if !defined(NDEBUG) - /** set to particle id after move() with prepare_old = true. -1 means not prepared. - * It is intended only for safety checks, not for codepath selection. - */ - int old_prepared_elec_id_; -#endif - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; +using SoaDistanceTableAA = SoaDistanceTableAAT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableAAOMPTarget.h b/src/Particle/SoaDistanceTableAAOMPTarget.h index d5b8c5f1da..5eb91e236e 100644 --- a/src/Particle/SoaDistanceTableAAOMPTarget.h +++ b/src/Particle/SoaDistanceTableAAOMPTarget.h @@ -14,13 +14,7 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableAATOMPTarget.h" namespace qmcplusplus { @@ -28,480 +22,7 @@ namespace qmcplusplus * @brief A derived classe from DistacneTableData, specialized for dense case */ template -struct SoaDistanceTableAAOMPTarget : public DTD_BConds, public DistanceTableAA -{ - /// actual memory for dist and displacements_ - aligned_vector memory_pool_; - - /// actual memory for temp_r_ - DistRow temp_r_mem_; - /// actual memory for temp_dr_ - DisplRow temp_dr_mem_; - /// actual memory for old_r_ - DistRow old_r_mem_; - /// actual memory for old_dr_ - DisplRow old_dr_mem_; - - ///multi walker shared memory buffer - struct DTAAMultiWalkerMem : public Resource - { - ///dist displ for temporary and old pairs - Vector>> mw_new_old_dist_displ; - - /** distances from a range of indics to the source. - * for original particle index i (row) and source particle id j (col) - * j < i, the element data is dist(r_i - r_j) - * j > i, the element data is dist(r_(n - 1 - i) - r_(n - 1 - j)) - */ - Vector>> mw_distances_subset; - - DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") {} - - DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - SoaDistanceTableAAOMPTarget(ParticleSet& target) - : DTD_BConds(target.getLattice()), - DistanceTableAA(target, DTModes::ALL_OFF), - num_targets_padded_(getAlignedSize(num_targets_)), -#if !defined(NDEBUG) - old_prepared_elec_id_(-1), -#endif - offload_timer_(createGlobalTimer(std::string("DTAAOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTAAOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAAOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAAOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&target.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - resize(); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - } - - SoaDistanceTableAAOMPTarget() = delete; - SoaDistanceTableAAOMPTarget(const SoaDistanceTableAAOMPTarget&) = delete; - ~SoaDistanceTableAAOMPTarget(){PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")} - - size_t compute_size(int N) const - { - const size_t num_padded = getAlignedSize(N); - const size_t Alignment = getAlignment(); - return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; - } - - void resize() - { - // initialize memory containers and views - const size_t total_size = compute_size(num_targets_); - memory_pool_.resize(total_size * (1 + D)); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); - displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); - } - - old_r_mem_.resize(num_targets_); - old_dr_mem_.resize(num_targets_); - temp_r_mem_.resize(num_targets_); - temp_dr_mem_.resize(num_targets_); - } - - const RealType* getMultiWalkerTempDataPtr() const override - { - return mw_mem_handle_.getResource().mw_new_old_dist_displ.data(); - } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - - auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ; - mw_new_old_dist_displ.resize(nw * 2 * stride_size); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * iw, num_targets_padded_); - dt.temp_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * iw + num_targets_padded_); - dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw), num_targets_padded_); - dt.old_dr_.attachReference(num_targets_, num_targets_padded_, - mw_new_old_dist_displ.data() + stride_size * (iw + nw) + num_targets_padded_); - } - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - const size_t nw = dt_list.size(); - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.temp_r_.free(); - dt.temp_dr_.free(); - dt.old_r_.free(); - dt.old_dr_.free(); - } - } - - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); - - constexpr T BigR = std::numeric_limits::max(); - for (int iat = 1; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), distances_[iat].data(), - displacements_[iat], 0, iat, iat); - } - - /** compute distances from particles in [range_begin, range_end) to all the particles. - * Although [range_begin, range_end) and be any particle [0, num_sources), it is only necessary to compute - * half of the table due to the symmetry of AA table. See note of the output data object mw_distances_subset - * To keep resident memory minimal on the device, range_end - range_begin < num_particls_stored is required. - */ - const RealType* mw_evalDistsInRange(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - size_t range_begin, - size_t range_end) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - const size_t subset_size = range_end - range_begin; - if (subset_size > dt_leader.num_particls_stored) - throw std::runtime_error("not enough internal buffer"); - - ScopedTimer local_timer(dt_leader.evaluate_timer_); - - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - const size_t nw = dt_list.size(); - const auto num_sources_local = dt_leader.num_targets_; - const auto num_padded = dt_leader.num_targets_padded_; - mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded); - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* dist_ranged = mw_mem.mw_distances_subset.data(); - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams)") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - { - for (int irow = 0; irow < subset_size; irow++) - { - T* dist = dist_ranged + (irow + subset_size * iw) * num_padded; - size_t id_target = irow + range_begin; +using SoaDistanceTableAAOMPTarget = SoaDistanceTableAATOMPTarget; - T dx, dy, dz; - if (id_target < iel) - { - dx = source_pos_ptr[id_target] - source_pos_ptr[iel]; - dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded]; - dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2]; - } - else - { - const size_t id_target_reverse = num_sources_local - 1 - id_target; - const size_t iel_reverse = num_sources_local - 1 - iel; - dx = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse]; - dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded]; - dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2]; - } - - dist[iel] = DTD_BConds::computeDist(dx, dy, dz); - } - } - } - } - return mw_mem.mw_distances_subset.data(); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - -#if !defined(NDEBUG) - old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size()); - temp_dr_.attachReference(temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data()); - - assert((prepare_old && iat >= 0 && iat < num_targets_) || !prepare_old); - DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, 0, - num_targets_, iat); - // set up old_r_ and old_dr_ for moves may get accepted. - if (prepare_old) - { - old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size()); - old_dr_.attachReference(old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data()); - //recompute from scratch - DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), old_r_.data(), old_dr_, - 0, num_targets_, iat); - old_r_[iat] = std::numeric_limits::max(); //assign a big number - } - } - - /** evaluate the temporary pair relations when a move is proposed - * this implementation is asynchronous and the synchronization is managed at ParticleSet. - * Transferring results to host depends on DTModes::NEED_TEMP_DATA_ON_HOST. - * If the temporary pair distance are consumed on the device directly, the device to host data transfer can be - * skipped as an optimization. - */ - void mw_move(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& rnew_list, - const IndexType iat, - bool prepare_old = true) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& pset_leader = p_list.getLeader(); - - ScopedTimer local_timer(move_timer_); - const size_t nw = dt_list.size(); - const size_t stride_size = num_targets_padded_ * (D + 1); - - auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ; - - for (int iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); -#if !defined(NDEBUG) - dt.old_prepared_elec_id_ = prepare_old ? iat : -1; -#endif - auto& coordinates_soa = static_cast(p_list[iw].getCoordinates()); - } - - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto& coordinates_leader = static_cast(pset_leader.getCoordinates()); - - const auto num_sources_local = num_targets_; - const auto num_padded = num_targets_padded_; - auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); - auto* r_dr_ptr = mw_new_old_dist_displ.data(); - auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data(); - const size_t new_pos_stride = coordinates_leader.getFusedNewPosBuffer().capacity(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(nw * num_teams) \ - nowait depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])") - for (int iw = 0; iw < nw; ++iw) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; - const size_t first = ChunkSizePerTeam * team_id; - const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - { // temp - auto* r_iw_ptr = r_dr_ptr + iw * stride_size; - auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = new_pos_ptr[idim * new_pos_stride + iw]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - } - - if (prepare_old) - { // old - auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size; - auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded; - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = source_pos_ptr[idim * num_padded + iat]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, - num_padded, iel, iat); - r_iw_ptr[iat] = std::numeric_limits::max(); //assign a big number - } - } - } - - if (modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) - { - PRAGMA_OFFLOAD("omp target update nowait depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ - from(r_dr_ptr[:mw_new_old_dist_displ.size()])") - } - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - //ensure there are neighbors - assert(num_targets_ > 1); - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_targets_; ++jat) - if (temp_r_[jat] < min_dist && jat != iat) - { - min_dist = temp_r_[jat]; - index = jat; - } - assert(index >= 0); - dr = temp_dr_[index]; - } - else - { - for (int jat = 0; jat < iat; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - for (int jat = iat + 1; jat < num_targets_; ++jat) - if (distances_[jat][iat] < min_dist) - { - min_dist = distances_[jat][iat]; - index = jat; - } - assert(index != iat && index >= 0); - if (index < iat) - dr = displacements_[iat][index]; - else - dr = displacements_[index][iat]; - } - r = min_dist; - return index; - } - - /** After accepting the iat-th particle, update the iat-th row of distances_ and displacements_. - * Upper triangle is not needed in the later computation and thus not updated - */ - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - //update [0, iat) columns - const int nupdate = iat; - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[iat].data(idim)); - //copy column - for (size_t i = iat + 1; i < num_targets_; ++i) - { - distances_[i][iat] = temp_r_[i]; - displacements_[i](iat) = -temp_dr_[i]; - } - } - - void updatePartial(IndexType jat, bool from_temp) override - { - ScopedTimer local_timer(update_timer_); - - //update [0, jat) - const int nupdate = jat; - if (from_temp) - { - //copy row - assert(nupdate <= temp_r_.size()); - std::copy_n(temp_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - else - { - assert(old_prepared_elec_id_ == jat); - //copy row - assert(nupdate <= old_r_.size()); - std::copy_n(old_r_.data(), nupdate, distances_[jat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(old_dr_.data(idim), nupdate, displacements_[jat].data(idim)); - } - } - - void mw_updatePartial(const RefVectorWithLeader& dt_list, - IndexType jat, - const std::vector& from_temp) override - { - // if temp data on host is not updated by mw_move during p-by-p moves, there is no need to update distance table - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)) - return; - - for (int iw = 0; iw < dt_list.size(); iw++) - dt_list[iw].updatePartial(jat, from_temp[iw]); - } - - void mw_finalizePbyP(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - // if the distance table is not updated by mw_move during p-by-p, needs to recompute the whole table - // before being used by Hamiltonian if requested - if (!(modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) && (modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP)) - mw_evaluate(dt_list, p_list); - } - - size_t get_num_particls_stored() const override { return num_particls_stored; } - -private: - ///number of targets with padding - const size_t num_targets_padded_; -#if !defined(NDEBUG) - /** set to particle id after move() with prepare_old = true. -1 means not prepared. - * It is intended only for safety checks, not for codepath selection. - */ - int old_prepared_elec_id_; -#endif - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; - /// the particle count of the internal stored distances. - const size_t num_particls_stored = 64; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableAAT.h b/src/Particle/SoaDistanceTableAAT.h new file mode 100644 index 0000000000..d86305e8f1 --- /dev/null +++ b/src/Particle/SoaDistanceTableAAT.h @@ -0,0 +1,218 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_AAT_H +#define QMCPLUSPLUS_DTDIMPL_AAT_H + +#include "CPU/SIMD/algorithm.hpp" +#include "Lattice/ParticleBConds3DSoa.h" +#include "Particle/DistanceTableT.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for dense case + */ +template +struct SoaDistanceTableAAT : public DTD_BConds::RealType, D, SC>, + public DistanceTableAAT +{ + using RealType = typename DistanceTableAAT::RealType; + using PosType = typename DistanceTableAAT::PosType; + using IndexType = typename DistanceTableAAT::IndexType; + + /// actual memory for dist and displacements_ + aligned_vector memory_pool_; + + SoaDistanceTableAAT(ParticleSetT& target) + : DTD_BConds(target.getLattice()), + DistanceTableAAT(target, DTModes::ALL_OFF), + num_targets_padded_(getAlignedSize(this->num_targets_)), +#if !defined(NDEBUG) + old_prepared_elec_id_(-1), +#endif + evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") + target.getName() + "_" + target.getName(), + timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTAA::move_") + target.getName() + "_" + target.getName(), + timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTAA::update_") + target.getName() + "_" + target.getName(), + timer_level_fine)) + { + resize(); + } + + SoaDistanceTableAAT() = delete; + SoaDistanceTableAAT(const SoaDistanceTableAAT&) = delete; + ~SoaDistanceTableAAT() override {} + + size_t compute_size(int N) const + { + const size_t num_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; + } + + void resize() + { + // initialize memory containers and views + const size_t total_size = compute_size(this->num_targets_); + memory_pool_.resize(total_size * (1 + D)); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) + { + this->distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); + this->displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); + } + + this->old_r_.resize(this->num_targets_); + this->old_dr_.resize(this->num_targets_); + this->temp_r_.resize(this->num_targets_); + this->temp_dr_.resize(this->num_targets_); + } + + inline void evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); + constexpr RealType BigR = std::numeric_limits::max(); + for (int iat = 1; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, iat, + iat); + } + + /// evaluate the temporary pair relations + inline void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + +#if !defined(NDEBUG) + old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), this->temp_r_.data(), + this->temp_dr_, 0, this->num_targets_, iat); + // set up old_r_ and old_dr_ for moves may get accepted. + if (prepare_old) + { + // recompute from scratch + DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), + this->old_r_.data(), this->old_dr_, 0, this->num_targets_, iat); + this->old_r_[iat] = std::numeric_limits::max(); // assign a big number + } + } + + int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + // ensure there are neighbors + assert(this->num_targets_ > 1); + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) + { + for (int jat = 0; jat < this->num_targets_; ++jat) + if (this->temp_r_[jat] < min_dist && jat != iat) + { + min_dist = this->temp_r_[jat]; + index = jat; + } + assert(index >= 0); + dr = this->temp_dr_[index]; + } + else + { + for (int jat = 0; jat < iat; ++jat) + if (this->distances_[iat][jat] < min_dist) + { + min_dist = this->distances_[iat][jat]; + index = jat; + } + for (int jat = iat + 1; jat < this->num_targets_; ++jat) + if (this->distances_[jat][iat] < min_dist) + { + min_dist = this->distances_[jat][iat]; + index = jat; + } + assert(index != iat && index >= 0); + if (index < iat) + dr = this->displacements_[iat][index]; + else + dr = this->displacements_[index][iat]; + } + r = min_dist; + return index; + } + + /** After accepting the iat-th particle, update the iat-th row of distances_ + * and displacements_. Upper triangle is not needed in the later computation + * and thus not updated + */ + inline void update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + // update [0, iat) + const int nupdate = iat; + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n(this->temp_r_.data(), nupdate, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[iat].data(idim)); + // copy column + for (size_t i = iat + 1; i < this->num_targets_; ++i) + { + this->distances_[i][iat] = this->temp_r_[i]; + this->displacements_[i](iat) = -this->temp_dr_[i]; + } + } + + void updatePartial(IndexType jat, bool from_temp) override + { + ScopedTimer local_timer(update_timer_); + // update [0, jat) + const int nupdate = jat; + if (from_temp) + { + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n(this->temp_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[jat].data(idim)); + } + else + { + assert(old_prepared_elec_id_ == jat); + // copy row + assert(nupdate <= this->old_r_.size()); + std::copy_n(this->old_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->old_dr_.data(idim), nupdate, this->displacements_[jat].data(idim)); + } + } + +private: + /// number of targets with padding + const size_t num_targets_padded_; +#if !defined(NDEBUG) + /** set to particle id after move() with prepare_old = true. -1 means not + * prepared. It is intended only for safety checks, not for codepath + * selection. + */ + int old_prepared_elec_id_; +#endif + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableAATOMPTarget.h b/src/Particle/SoaDistanceTableAATOMPTarget.h new file mode 100644 index 0000000000..8e5835f412 --- /dev/null +++ b/src/Particle/SoaDistanceTableAATOMPTarget.h @@ -0,0 +1,532 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H +#define QMCPLUSPLUS_DTDIMPL_AAT_OMPTARGET_H + +#include "CPU/SIMD/algorithm.hpp" +#include "DistanceTableT.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "OMPTarget/OMPallocator.hpp" +#include "Particle/RealSpacePositionsTOMPTarget.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for dense case + */ +template +struct SoaDistanceTableAATOMPTarget : public DTD_BConds::RealType, D, SC>, + public DistanceTableAAT +{ + using RealType = typename DistanceTableAAT::RealType; + using PosType = typename DistanceTableAAT::PosType; + using IndexType = typename DistanceTableAAT::IndexType; + using DistRow = typename DistanceTableAAT::DistRow; + using DisplRow = typename DistanceTableAAT::DisplRow; + + /// actual memory for dist and displacements_ + aligned_vector memory_pool_; + + /// actual memory for temp_r_ + DistRow temp_r_mem_; + /// actual memory for temp_dr_ + DisplRow temp_dr_mem_; + /// actual memory for old_r_ + DistRow old_r_mem_; + /// actual memory for old_dr_ + DisplRow old_dr_mem_; + + /// multi walker shared memory buffer + struct DTAAMultiWalkerMem : public Resource + { + /// dist displ for temporary and old pairs + Vector>> mw_new_old_dist_displ; + + /** distances from a range of indics to the source. + * for original particle index i (row) and source particle id j (col) + * j < i, the element data is dist(r_i - r_j) + * j > i, the element data is dist(r_(n - 1 - i) - r_(n - 1 - j)) + */ + Vector>> mw_distances_subset; + + DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") {} + + DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + }; + + ResourceHandle mw_mem_handle_; + + SoaDistanceTableAATOMPTarget(ParticleSetT& target) + : DTD_BConds(target.getLattice()), + DistanceTableAAT(target, DTModes::ALL_OFF), + num_targets_padded_(getAlignedSize(this->num_targets_)), +#if !defined(NDEBUG) + old_prepared_elec_id_(-1), +#endif + offload_timer_(createGlobalTimer(std::string("DTAAOMPTarget::offload_") + this->name_, timer_level_fine)), + evaluate_timer_(createGlobalTimer(std::string("DTAAOMPTarget::evaluate_") + this->name_, timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTAAOMPTarget::move_") + this->name_, timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTAAOMPTarget::update_") + this->name_, timer_level_fine)) + + { + auto* coordinates_soa = dynamic_cast*>(&target.getCoordinates()); + if (!coordinates_soa) + throw std::runtime_error("Source particle set doesn't have OpenMP " + "offload. Contact developers!"); + resize(); + PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") + } + + SoaDistanceTableAATOMPTarget() = delete; + SoaDistanceTableAATOMPTarget(const SoaDistanceTableAATOMPTarget&) = delete; + ~SoaDistanceTableAATOMPTarget(){PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")} + + size_t compute_size(int N) const + { + const size_t num_padded = getAlignedSize(N); + const size_t Alignment = getAlignment(); + return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2; + } + + void resize() + { + // initialize memory containers and views + const size_t total_size = compute_size(this->num_targets_); + memory_pool_.resize(total_size * (1 + D)); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) + { + this->distances_[i].attachReference(memory_pool_.data() + compute_size(i), i); + this->displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i)); + } + + old_r_mem_.resize(this->num_targets_); + old_dr_mem_.resize(this->num_targets_); + temp_r_mem_.resize(this->num_targets_); + temp_dr_mem_.resize(this->num_targets_); + } + + const RealType* getMultiWalkerTempDataPtr() const override + { + return mw_mem_handle_.getResource().mw_new_old_dist_displ.data(); + } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = dt_list.template getCastedLeader(); + dt_leader.mw_mem_handle_ = collection.lendResource(); + const size_t nw = dt_list.size(); + const size_t stride_size = num_targets_padded_ * (D + 1); + + for (int iw = 0; iw < nw; iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + dt.temp_r_.free(); + dt.temp_dr_.free(); + dt.old_r_.free(); + dt.old_dr_.free(); + } + + auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ; + mw_new_old_dist_displ.resize(nw * 2 * stride_size); + for (int iw = 0; iw < nw; iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + dt.temp_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * iw, num_targets_padded_); + dt.temp_dr_.attachReference(this->num_targets_, num_targets_padded_, + mw_new_old_dist_displ.data() + stride_size * iw + num_targets_padded_); + dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw), num_targets_padded_); + dt.old_dr_.attachReference(this->num_targets_, num_targets_padded_, + mw_new_old_dist_displ.data() + stride_size * (iw + nw) + num_targets_padded_); + } + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + collection.takebackResource(dt_list.template getCastedLeader().mw_mem_handle_); + const size_t nw = dt_list.size(); + for (int iw = 0; iw < nw; iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + dt.temp_r_.free(); + dt.temp_dr_.free(); + dt.old_r_.free(); + dt.old_dr_.free(); + } + } + + inline void evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); + + constexpr T BigR = std::numeric_limits::max(); + for (int iat = 1; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, iat, + iat); + } + + /** compute distances from particles in [range_begin, range_end) to all the + * particles. Although [range_begin, range_end) and be any particle [0, + * num_sources), it is only necessary to compute half of the table due to + * the symmetry of AA table. See note of the output data object + * mw_distances_subset To keep resident memory minimal on the device, + * range_end - range_begin < num_particls_stored is required. + */ + const RealType* mw_evalDistsInRange(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + size_t range_begin, + size_t range_end) const override + { + auto& dt_leader = dt_list.template getCastedLeader(); + const size_t subset_size = range_end - range_begin; + if (subset_size > dt_leader.num_particls_stored) + throw std::runtime_error("not enough internal buffer"); + + ScopedTimer local_timer(dt_leader.evaluate_timer_); + + DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& pset_leader = p_list.getLeader(); + + const size_t nw = dt_list.size(); + const auto num_sources_local = dt_leader.num_targets_; + const auto num_padded = dt_leader.num_targets_padded_; + mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded); + + const int ChunkSizePerTeam = 512; + const size_t num_teams = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto& coordinates_leader = static_cast&>(pset_leader.getCoordinates()); + + auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); + auto* dist_ranged = mw_mem.mw_distances_subset.data(); + { + ScopedTimer offload(dt_leader.offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(nw * num_teams)") + for (int iw = 0; iw < nw; ++iw) + for (int team_id = 0; team_id < num_teams; team_id++) + { + auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + { + for (int irow = 0; irow < subset_size; irow++) + { + RealType* dist = dist_ranged + (irow + subset_size * iw) * num_padded; + size_t id_target = irow + range_begin; + + RealType dx, dy, dz; + if (id_target < iel) + { + dx = source_pos_ptr[id_target] - source_pos_ptr[iel]; + dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded]; + dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2]; + } + else + { + const size_t id_target_reverse = num_sources_local - 1 - id_target; + const size_t iel_reverse = num_sources_local - 1 - iel; + dx = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse]; + dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded]; + dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2]; + } + + dist[iel] = DTD_BConds::computeDist(dx, dy, dz); + } + } + } + } + return mw_mem.mw_distances_subset.data(); + } + + /// evaluate the temporary pair relations + inline void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + +#if !defined(NDEBUG) + old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + this->temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size()); + this->temp_dr_.attachReference(temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data()); + + assert((prepare_old && iat >= 0 && iat < this->num_targets_) || !prepare_old); + DTD_BConds::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), this->temp_r_.data(), + this->temp_dr_, 0, this->num_targets_, iat); + // set up old_r_ and old_dr_ for moves may get accepted. + if (prepare_old) + { + this->old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size()); + this->old_dr_.attachReference(old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data()); + // recompute from scratch + DTD_BConds::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(), + this->old_r_.data(), this->old_dr_, 0, this->num_targets_, iat); + this->old_r_[iat] = std::numeric_limits::max(); // assign a big number + } + } + + /** evaluate the temporary pair relations when a move is proposed + * this implementation is asynchronous and the synchronization is managed at + * ParticleSet. Transferring results to host depends on + * DTModes::NEED_TEMP_DATA_ON_HOST. If the temporary pair distance are + * consumed on the device directly, the device to host data transfer can be + * skipped as an optimization. + */ + void mw_move(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& rnew_list, + const IndexType iat, + bool prepare_old = true) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = dt_list.template getCastedLeader(); + DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& pset_leader = p_list.getLeader(); + + ScopedTimer local_timer(move_timer_); + const size_t nw = dt_list.size(); + const size_t stride_size = num_targets_padded_ * (D + 1); + + auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ; + + for (int iw = 0; iw < nw; iw++) + { + auto& dt = dt_list.template getCastedElement(iw); +#if !defined(NDEBUG) + dt.old_prepared_elec_id_ = prepare_old ? iat : -1; +#endif + auto& coordinates_soa = static_cast&>(p_list[iw].getCoordinates()); + } + + const int ChunkSizePerTeam = 512; + const size_t num_teams = (this->num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto& coordinates_leader = static_cast&>(pset_leader.getCoordinates()); + + const auto num_sources_local = this->num_targets_; + const auto num_padded = num_targets_padded_; + const auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data(); + auto* r_dr_ptr = mw_new_old_dist_displ.data(); + const auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data(); + const size_t new_pos_stride = coordinates_leader.getFusedNewPosBuffer().capacity(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(nw * num_teams) nowait \ + is_device_ptr(new_pos_ptr,rsoa_dev_list_ptr) \ + depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])") + for (int iw = 0; iw < nw; ++iw) + for (int team_id = 0; team_id < num_teams; team_id++) + { + const auto* source_pos_ptr = rsoa_dev_list_ptr[iw]; + const size_t first = ChunkSizePerTeam * team_id; + const size_t last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); + + { // temp + auto* r_iw_ptr = r_dr_ptr + iw * stride_size; + auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded; + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = new_pos_ptr[idim * new_pos_stride + iw]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, + num_padded, iel, iat); + } + + if (prepare_old) + { // old + auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size; + auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded; + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = source_pos_ptr[idim * num_padded + iat]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr, + num_padded, iel, iat); + r_iw_ptr[iat] = std::numeric_limits::max(); // assign a + // big number + } + } + } + + if (this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) + { + PRAGMA_OFFLOAD("omp target update nowait \ + depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ + from(r_dr_ptr[:mw_new_old_dist_displ.size()])") + } + } + + int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + // ensure there are neighbors + assert(this->num_targets_ > 1); + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) + { + for (int jat = 0; jat < this->num_targets_; ++jat) + if (this->temp_r_[jat] < min_dist && jat != iat) + { + min_dist = this->temp_r_[jat]; + index = jat; + } + assert(index >= 0); + dr = this->temp_dr_[index]; + } + else + { + for (int jat = 0; jat < iat; ++jat) + if (this->distances_[iat][jat] < min_dist) + { + min_dist = this->distances_[iat][jat]; + index = jat; + } + for (int jat = iat + 1; jat < this->num_targets_; ++jat) + if (this->distances_[jat][iat] < min_dist) + { + min_dist = this->distances_[jat][iat]; + index = jat; + } + assert(index != iat && index >= 0); + if (index < iat) + dr = this->displacements_[iat][index]; + else + dr = this->displacements_[index][iat]; + } + r = min_dist; + return index; + } + + /** After accepting the iat-th particle, update the iat-th row of distances_ + * and displacements_. Upper triangle is not needed in the later computation + * and thus not updated + */ + inline void update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + // update [0, iat) columns + const int nupdate = iat; + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n(this->temp_r_.data(), nupdate, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[iat].data(idim)); + // copy column + for (size_t i = iat + 1; i < this->num_targets_; ++i) + { + this->distances_[i][iat] = this->temp_r_[i]; + this->displacements_[i](iat) = -this->temp_dr_[i]; + } + } + + void updatePartial(IndexType jat, bool from_temp) override + { + ScopedTimer local_timer(update_timer_); + + // update [0, jat) + const int nupdate = jat; + if (from_temp) + { + // copy row + assert(nupdate <= this->temp_r_.size()); + std::copy_n(this->temp_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[jat].data(idim)); + } + else + { + assert(old_prepared_elec_id_ == jat); + // copy row + assert(nupdate <= this->old_r_.size()); + std::copy_n(this->old_r_.data(), nupdate, this->distances_[jat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->old_dr_.data(idim), nupdate, this->displacements_[jat].data(idim)); + } + } + + void mw_updatePartial(const RefVectorWithLeader>& dt_list, + IndexType jat, + const std::vector& from_temp) override + { + // if temp data on host is not updated by mw_move during p-by-p moves, + // there is no need to update distance table + if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)) + return; + + for (int iw = 0; iw < dt_list.size(); iw++) + dt_list[iw].updatePartial(jat, from_temp[iw]); + } + + void mw_finalizePbyP(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list) const override + { + // if the distance table is not updated by mw_move during p-by-p, needs + // to recompute the whole table before being used by Hamiltonian if + // requested + if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) && + (this->modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP)) + this->mw_evaluate(dt_list, p_list); + } + + size_t get_num_particls_stored() const override { return num_particls_stored; } + +private: + /// number of targets with padding + const size_t num_targets_padded_; +#if !defined(NDEBUG) + /** set to particle id after move() with prepare_old = true. -1 means not + * prepared. It is intended only for safety checks, not for codepath + * selection. + */ + int old_prepared_elec_id_; +#endif + /// timer for offload portion + NewTimer& offload_timer_; + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; + /// the particle count of the internal stored distances. + const size_t num_particls_stored = 64; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableAB.h b/src/Particle/SoaDistanceTableAB.h index f9b3c79cd7..c2eaf71446 100644 --- a/src/Particle/SoaDistanceTableAB.h +++ b/src/Particle/SoaDistanceTableAB.h @@ -13,136 +13,11 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AB_H #define QMCPLUSPLUS_DTDIMPL_AB_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "Utilities/FairDivide.h" -#include "Concurrency/OpenMP.h" +#include "Particle/SoaDistanceTableABT.h" namespace qmcplusplus { -/**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form - */ template -struct SoaDistanceTableAB : public DTD_BConds, public DistanceTableAB -{ - SoaDistanceTableAB(const ParticleSet& source, ParticleSet& target) - : DTD_BConds(source.getLattice()), - DistanceTableAB(source, target, DTModes::ALL_OFF), - evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") + target.getName() + "_" + source.getName(), - timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTAB::move_") + target.getName() + "_" + source.getName(), - timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTAB::update_") + target.getName() + "_" + source.getName(), - timer_level_fine)) - { - resize(); - } - - void resize() - { - if (num_sources_ * num_targets_ == 0) - return; - - // initialize memory containers and views - const int num_sources_padded = getAlignedSize(num_sources_); - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].resize(num_sources_padded); - displacements_[i].resize(num_sources_padded); - } - - // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function - // temp_r_ is padded explicitly while temp_dr_ is padded internally - temp_r_.resize(num_sources_padded); - temp_dr_.resize(num_sources_); - } - - SoaDistanceTableAB() = delete; - SoaDistanceTableAB(const SoaDistanceTableAB&) = delete; - - /** evaluate the full table */ - inline void evaluate(ParticleSet& P) override - { - ScopedTimer local_timer(evaluate_timer_); -#pragma omp parallel - { - int first, last; - FairDivideAligned(num_sources_, getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); - - //be aware of the sign of Displacement - for (int iat = 0; iat < num_targets_; ++iat) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], first, last); - } - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - DTD_BConds::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, - 0, num_sources_); - // If the full table is not ready all the time, overwrite the current value. - // If this step is missing, DT values can be undefined in case a move is rejected. - if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], 0, num_sources_); - } - - ///update the stripe for jat-th particle - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - std::copy_n(temp_r_.data(), num_sources_, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim)); - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_sources_; ++jat) - if (temp_r_[jat] < min_dist) - { - min_dist = temp_r_[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = temp_dr_[index]; - } - } - else - { - for (int jat = 0; jat < num_sources_; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = displacements_[iat][index]; - } - } - assert(index >= 0 && index < num_sources_); - return index; - } - -private: - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; +using SoaDistanceTableAB = SoaDistanceTableABT; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableABOMPTarget.h b/src/Particle/SoaDistanceTableABOMPTarget.h index 05dac1eaf1..c31b673a47 100644 --- a/src/Particle/SoaDistanceTableABOMPTarget.h +++ b/src/Particle/SoaDistanceTableABOMPTarget.h @@ -14,403 +14,15 @@ #ifndef QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H -#include "Lattice/ParticleBConds3DSoa.h" -#include "DistanceTable.h" -#include "OMPTarget/OMPallocator.hpp" -#include "Platforms/PinnedAllocator.h" -#include "Particle/RealSpacePositionsOMPTarget.h" -#include "ResourceCollection.h" -#include "OMPTarget/OMPTargetMath.hpp" +#include "Particle/SoaDistanceTableABTOMPTarget.h" namespace qmcplusplus { /**@ingroup nnlist - * @brief A derived classe from DistacneTableData, specialized for AB using a transposed form + * @brief A derived classe from DistacneTableData, specialized for dense case */ template -class SoaDistanceTableABOMPTarget : public DTD_BConds, public DistanceTableAB -{ -private: - template - using OffloadPinnedVector = Vector>>; - - ///accelerator output buffer for r and dr - OffloadPinnedVector r_dr_memorypool_; - ///accelerator input array for a list of target particle positions, num_targets_ x D - OffloadPinnedVector target_pos; - - ///multi walker shared memory buffer - struct DTABMultiWalkerMem : public Resource - { - ///accelerator output array for multiple walkers, [1+D][num_targets_][num_padded] (distances, displacements) - OffloadPinnedVector mw_r_dr; - ///accelerator input buffer for multiple data set - OffloadPinnedVector offload_input; - - DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") {} - - DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - }; - - ResourceHandle mw_mem_handle_; - - void resize() - { - if (num_sources_ * num_targets_ == 0) - return; - if (distances_.size()) - return; - - // initialize memory containers and views - const size_t num_padded = getAlignedSize(num_sources_); - const size_t stride_size = getPerTargetPctlStrideSize(); - r_dr_memorypool_.resize(stride_size * num_targets_); - - distances_.resize(num_targets_); - displacements_.resize(num_targets_); - for (int i = 0; i < num_targets_; ++i) - { - distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, num_sources_); - displacements_[i].attachReference(num_sources_, num_padded, - r_dr_memorypool_.data() + i * stride_size + num_padded); - } - } - - static void associateResource(const RefVectorWithLeader& dt_list) - { - auto& dt_leader = dt_list.getCastedLeader(); - - // initialize memory containers and views - size_t count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - count_targets += dt.targets(); - dt.r_dr_memorypool_.free(); - } - - const size_t num_sources = dt_leader.num_sources_; - const size_t num_padded = getAlignedSize(dt_leader.num_sources_); - const size_t stride_size = num_padded * (D + 1); - const size_t total_targets = count_targets; - auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr; - mw_r_dr.resize(total_targets * stride_size); - - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - assert(num_sources == dt.num_sources_); - - dt.distances_.resize(dt.targets()); - dt.displacements_.resize(dt.targets()); - - for (int i = 0; i < dt.targets(); ++i) - { - dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources); - dt.displacements_[i].attachReference(num_sources, num_padded, - mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } - } - -public: - SoaDistanceTableABOMPTarget(const ParticleSet& source, ParticleSet& target) - : DTD_BConds(source.getLattice()), - DistanceTableAB(source, target, DTModes::ALL_OFF), - offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + name_, timer_level_fine)), - evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + name_, timer_level_fine)), - move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + name_, timer_level_fine)), - update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + name_, timer_level_fine)) - - { - auto* coordinates_soa = dynamic_cast(&source.getCoordinates()); - if (!coordinates_soa) - throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!"); - PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") - - // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function - // temp_r_ is padded explicitly while temp_dr_ is padded internally - const int num_padded = getAlignedSize(num_sources_); - temp_r_.resize(num_padded); - temp_dr_.resize(num_sources_); - } - - SoaDistanceTableABOMPTarget() = delete; - SoaDistanceTableABOMPTarget(const SoaDistanceTableABOMPTarget&) = delete; - - ~SoaDistanceTableABOMPTarget() { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - auto& dt_leader = dt_list.getCastedLeader(); - dt_leader.mw_mem_handle_ = collection.lendResource(); - associateResource(dt_list); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& dt_list) const override - { - collection.takebackResource(dt_list.getCastedLeader().mw_mem_handle_); - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - dt.distances_.clear(); - dt.displacements_.clear(); - } - } - - const T* getMultiWalkerDataPtr() const override { return mw_mem_handle_.getResource().mw_r_dr.data(); } - - size_t getPerTargetPctlStrideSize() const override { return getAlignedSize(num_sources_) * (D + 1); } - - /** evaluate the full table */ - inline void evaluate(ParticleSet& P) override - { - resize(); - - ScopedTimer local_timer(evaluate_timer_); - // be aware of the sign of Displacement - const int num_targets_local = num_targets_; - const int num_sources_local = num_sources_; - const int num_padded = getAlignedSize(num_sources_); - - target_pos.resize(num_targets_ * D); - for (size_t iat = 0; iat < num_targets_; iat++) - for (size_t idim = 0; idim < D; idim++) - target_pos[iat * D + idim] = P.R[iat][idim]; - - auto* target_pos_ptr = target_pos.data(); - auto* source_pos_ptr = origin_.getCoordinates().getAllParticlePos().data(); - auto* r_dr_ptr = distances_[0].data(); - assert(distances_[0].data() + num_padded == displacements_[0].data()); - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - const size_t stride_size = getPerTargetPctlStrideSize(); - - { - ScopedTimer offload(offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(num_targets_*num_teams) \ - map(to: source_pos_ptr[:num_padded*D]) \ - map(always, to: target_pos_ptr[:num_targets_*D]) \ - map(always, from: r_dr_ptr[:num_targets_*stride_size])") - for (int iat = 0; iat < num_targets_local; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - auto* r_iat_ptr = r_dr_ptr + iat * stride_size; - auto* dr_iat_ptr = r_iat_ptr + num_padded; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - } - } - - inline void mw_evaluate(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list) const override - { - assert(this == &dt_list.getLeader()); - auto& dt_leader = dt_list.getCastedLeader(); - - ScopedTimer local_timer(evaluate_timer_); - - const size_t nw = dt_list.size(); - DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; - auto& mw_r_dr = mw_mem.mw_r_dr; - - size_t count_targets = 0; - for (ParticleSet& p : p_list) - count_targets += p.getTotalNum(); - const size_t total_targets = count_targets; - - const int num_padded = getAlignedSize(num_sources_); - -#ifndef NDEBUG - const int stride_size = getPerTargetPctlStrideSize(); - count_targets = 0; - for (size_t iw = 0; iw < dt_list.size(); iw++) - { - auto& dt = dt_list.getCastedElement(iw); - - for (int i = 0; i < dt.targets(); ++i) - { - assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size); - assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); - } - count_targets += dt.targets(); - } -#endif - - // This is horrible optimization putting different data types in a single buffer but allows a single H2D transfer - const size_t realtype_size = sizeof(RealType); - const size_t int_size = sizeof(int); - const size_t ptr_size = sizeof(RealType*); - auto& offload_input = mw_mem.offload_input; - offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size); - auto source_ptrs = reinterpret_cast(offload_input.data()); - auto target_positions = reinterpret_cast(offload_input.data() + ptr_size * nw); - auto walker_id_ptr = - reinterpret_cast(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size); - - count_targets = 0; - for (size_t iw = 0; iw < nw; iw++) - { - auto& dt = dt_list.getCastedElement(iw); - ParticleSet& pset(p_list[iw]); - - assert(dt.targets() == pset.getTotalNum()); - assert(num_sources_ == dt.num_sources_); - - auto& RSoA_OMPTarget = static_cast(dt.origin_.getCoordinates()); - source_ptrs[iw] = const_cast(RSoA_OMPTarget.getDevicePtr()); - - for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets) - { - walker_id_ptr[count_targets] = iw; - for (size_t idim = 0; idim < D; idim++) - target_positions[count_targets * D + idim] = pset.R[iat][idim]; - } - } - - // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team. - const int ChunkSizePerTeam = 512; - const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; - - auto* r_dr_ptr = mw_r_dr.data(); - auto* input_ptr = offload_input.data(); - const int num_sources_local = num_sources_; - - { - ScopedTimer offload(dt_leader.offload_timer_); - PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \ - map(always, to: input_ptr[:offload_input.size()]) \ - depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait") - for (int iat = 0; iat < total_targets; ++iat) - for (int team_id = 0; team_id < num_teams; team_id++) - { - auto* target_pos_ptr = reinterpret_cast(input_ptr + ptr_size * nw); - const int walker_id = - reinterpret_cast(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat]; - auto* source_pos_ptr = reinterpret_cast(input_ptr)[walker_id]; - auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1); - auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded; - - const int first = ChunkSizePerTeam * team_id; - const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); - - T pos[D]; - for (int idim = 0; idim < D; idim++) - pos[idim] = target_pos_ptr[iat * D + idim]; - - PRAGMA_OFFLOAD("omp parallel for") - for (int iel = first; iel < last; iel++) - DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, - num_padded, iel); - } - - if (!(modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) - { - PRAGMA_OFFLOAD( - "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") - } - // wait for computing and (optional) transferring back to host. - // It can potentially be moved to ParticleSet to fuse multiple similar taskwait - PRAGMA_OFFLOAD("omp taskwait") - } - } - - inline void mw_recompute(const RefVectorWithLeader& dt_list, - const RefVectorWithLeader& p_list, - const std::vector& recompute) const override - { - mw_evaluate(dt_list, p_list); - } - - ///evaluate the temporary pair relations - inline void move(const ParticleSet& P, const PosType& rnew, const IndexType iat, bool prepare_old) override - { - ScopedTimer local_timer(move_timer_); - DTD_BConds::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_, - 0, num_sources_); - // If the full table is not ready all the time, overwrite the current value. - // If this step is missing, DT values can be undefined in case a move is rejected. - if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) - DTD_BConds::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(), - distances_[iat].data(), displacements_[iat], 0, num_sources_); - } - - ///update the stripe for jat-th particle - inline void update(IndexType iat) override - { - ScopedTimer local_timer(update_timer_); - std::copy_n(temp_r_.data(), num_sources_, distances_[iat].data()); - for (int idim = 0; idim < D; ++idim) - std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim)); - } - - int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override - { - RealType min_dist = std::numeric_limits::max(); - int index = -1; - if (newpos) - { - for (int jat = 0; jat < num_sources_; ++jat) - if (temp_r_[jat] < min_dist) - { - min_dist = temp_r_[jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = temp_dr_[index]; - } - } - else - { - for (int jat = 0; jat < num_sources_; ++jat) - if (distances_[iat][jat] < min_dist) - { - min_dist = distances_[iat][jat]; - index = jat; - } - if (index >= 0) - { - r = min_dist; - dr = displacements_[iat][index]; - } - } - assert(index >= 0 && index < num_sources_); - return index; - } +using SoaDistanceTableABOMPTarget = SoaDistanceTableABTOMPTarget; -private: - /// timer for offload portion - NewTimer& offload_timer_; - /// timer for evaluate() - NewTimer& evaluate_timer_; - /// timer for move() - NewTimer& move_timer_; - /// timer for update() - NewTimer& update_timer_; -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/SoaDistanceTableABT.h b/src/Particle/SoaDistanceTableABT.h new file mode 100644 index 0000000000..bb1265a823 --- /dev/null +++ b/src/Particle/SoaDistanceTableABT.h @@ -0,0 +1,160 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_ABT_H +#define QMCPLUSPLUS_DTDIMPL_ABT_H + +#include "Concurrency/OpenMP.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "Particle/DistanceTableT.h" +#include "Utilities/FairDivide.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for AB using a + * transposed form + */ +template +struct SoaDistanceTableABT : public DTD_BConds::RealType, D, SC>, + public DistanceTableABT +{ + using RealType = typename DistanceTableABT::RealType; + using PosType = typename DistanceTableABT::PosType; + using IndexType = typename DistanceTableABT::IndexType; + + SoaDistanceTableABT(const ParticleSetT& source, ParticleSetT& target) + : DTD_BConds(source.getLattice()), + DistanceTableABT(source, target, DTModes::ALL_OFF), + evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") + target.getName() + "_" + source.getName(), + timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTAB::move_") + target.getName() + "_" + source.getName(), + timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTAB::update_") + target.getName() + "_" + source.getName(), + timer_level_fine)) + { + resize(); + } + + void resize() + { + if (this->num_sources_ * this->num_targets_ == 0) + return; + + // initialize memory containers and views + const int num_sources_padded = getAlignedSize(this->num_sources_); + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) + { + this->distances_[i].resize(num_sources_padded); + this->displacements_[i].resize(num_sources_padded); + } + + // The padding of temp_r_ and temp_dr_ is necessary for the memory copy + // in the update function temp_r_ is padded explicitly while temp_dr_ is + // padded internally + this->temp_r_.resize(num_sources_padded); + this->temp_dr_.resize(this->num_sources_); + } + + SoaDistanceTableABT() = delete; + SoaDistanceTableABT(const SoaDistanceTableABT&) = delete; + + /** evaluate the full table */ + inline void evaluate(ParticleSetT& P) override + { + ScopedTimer local_timer(evaluate_timer_); +#pragma omp parallel + { + int first, last; + FairDivideAligned(this->num_sources_, getAlignment(), omp_get_num_threads(), omp_get_thread_num(), + first, last); + + // be aware of the sign of Displacement + for (int iat = 0; iat < this->num_targets_; ++iat) + DTD_BConds::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], first, + last); + } + } + + /// evaluate the temporary pair relations + inline void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + DTD_BConds::computeDistances(rnew, this->origin_.getCoordinates().getAllParticlePos(), + this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_); + // If the full table is not ready all the time, overwrite the current + // value. If this step is missing, DT values can be undefined in case a + // move is rejected. + if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) + DTD_BConds::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, + this->num_sources_); + } + + /// update the stripe for jat-th particle + inline void update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + std::copy_n(this->temp_r_.data(), this->num_sources_, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), this->num_sources_, this->displacements_[iat].data(idim)); + } + + int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) + { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->temp_r_[jat] < min_dist) + { + min_dist = this->temp_r_[jat]; + index = jat; + } + if (index >= 0) + { + r = min_dist; + dr = this->temp_dr_[index]; + } + } + else + { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->distances_[iat][jat] < min_dist) + { + min_dist = this->distances_[iat][jat]; + index = jat; + } + if (index >= 0) + { + r = min_dist; + dr = this->displacements_[iat][index]; + } + } + assert(index >= 0 && index < this->num_sources_); + return index; + } + +private: + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/SoaDistanceTableABTOMPTarget.h b/src/Particle/SoaDistanceTableABTOMPTarget.h new file mode 100644 index 0000000000..2447395e2a --- /dev/null +++ b/src/Particle/SoaDistanceTableABTOMPTarget.h @@ -0,0 +1,436 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +#ifndef QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H +#define QMCPLUSPLUS_DTDIMPL_ABT_OMPTARGET_H + +#include "DistanceTableT.h" +#include "Lattice/ParticleBConds3DSoa.h" +#include "OMPTarget/OMPTargetMath.hpp" +#include "OMPTarget/OMPallocator.hpp" +#include "Particle/RealSpacePositionsTOMPTarget.h" +#include "Platforms/PinnedAllocator.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +/**@ingroup nnlist + * @brief A derived classe from DistacneTableData, specialized for AB using a + * transposed form + */ +template +class SoaDistanceTableABTOMPTarget : public DTD_BConds::RealType, D, SC>, + public DistanceTableABT +{ +private: + template + using OffloadPinnedVector = Vector>>; + + using RealType = typename DistanceTableABT::RealType; + using PosType = typename DistanceTableABT::PosType; + using IndexType = typename DistanceTableABT::IndexType; + + /// accelerator output buffer for r and dr + OffloadPinnedVector r_dr_memorypool_; + /// accelerator input array for a list of target particle positions, + /// num_targets_ x D + OffloadPinnedVector target_pos; + + /// multi walker shared memory buffer + struct DTABMultiWalkerMem : public Resource + { + /// accelerator output array for multiple walkers, + /// [1+D][num_targets_][num_padded] (distances, displacements) + OffloadPinnedVector mw_r_dr; + /// accelerator input buffer for multiple data set + OffloadPinnedVector offload_input; + + DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") {} + + DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + }; + + ResourceHandle mw_mem_handle_; + + void resize() + { + if (this->num_sources_ * this->num_targets_ == 0) + return; + if (this->distances_.size()) + return; + + // initialize memory containers and views + const size_t num_padded = getAlignedSize(this->num_sources_); + const size_t stride_size = getPerTargetPctlStrideSize(); + r_dr_memorypool_.resize(stride_size * this->num_targets_); + + this->distances_.resize(this->num_targets_); + this->displacements_.resize(this->num_targets_); + for (int i = 0; i < this->num_targets_; ++i) + { + this->distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, this->num_sources_); + this->displacements_[i].attachReference(this->num_sources_, num_padded, + r_dr_memorypool_.data() + i * stride_size + num_padded); + } + } + + static void associateResource(const RefVectorWithLeader>& dt_list) + { + auto& dt_leader = dt_list.template getCastedLeader(); + + // initialize memory containers and views + size_t count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + count_targets += dt.targets(); + dt.r_dr_memorypool_.free(); + } + + const size_t num_sources = dt_leader.num_sources_; + const size_t num_padded = getAlignedSize(dt_leader.num_sources_); + const size_t stride_size = num_padded * (D + 1); + const size_t total_targets = count_targets; + auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr; + mw_r_dr.resize(total_targets * stride_size); + + count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + assert(num_sources == dt.num_sources_); + + dt.distances_.resize(dt.targets()); + dt.displacements_.resize(dt.targets()); + + for (int i = 0; i < dt.targets(); ++i) + { + dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources); + dt.displacements_[i].attachReference(num_sources, num_padded, + mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); + } + count_targets += dt.targets(); + } + } + +public: + SoaDistanceTableABTOMPTarget(const ParticleSetT& source, ParticleSetT& target) + : DTD_BConds(source.getLattice()), + DistanceTableABT(source, target, DTModes::ALL_OFF), + offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + this->name_, timer_level_fine)), + evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + this->name_, timer_level_fine)), + move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + this->name_, timer_level_fine)), + update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + this->name_, timer_level_fine)) + + { + auto* coordinates_soa = dynamic_cast*>(&source.getCoordinates()); + if (!coordinates_soa) + throw std::runtime_error("Source particle set doesn't have OpenMP " + "offload. Contact developers!"); + PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])") + + // The padding of temp_r_ and temp_dr_ is necessary for the memory copy + // in the update function temp_r_ is padded explicitly while temp_dr_ is + // padded internally + const int num_padded = getAlignedSize(this->num_sources_); + this->temp_r_.resize(num_padded); + this->temp_dr_.resize(this->num_sources_); + } + + SoaDistanceTableABTOMPTarget() = delete; + SoaDistanceTableABTOMPTarget(const SoaDistanceTableABTOMPTarget&) = delete; + + ~SoaDistanceTableABTOMPTarget() { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + auto& dt_leader = dt_list.template getCastedLeader(); + dt_leader.mw_mem_handle_ = collection.lendResource(); + associateResource(dt_list); + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& dt_list) const override + { + collection.takebackResource(dt_list.template getCastedLeader().mw_mem_handle_); + for (size_t iw = 0; iw < dt_list.size(); iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + dt.distances_.clear(); + dt.displacements_.clear(); + } + } + + const RealType* getMultiWalkerDataPtr() const override { return mw_mem_handle_.getResource().mw_r_dr.data(); } + + size_t getPerTargetPctlStrideSize() const override { return getAlignedSize(this->num_sources_) * (D + 1); } + + /** evaluate the full table */ + inline void evaluate(ParticleSetT& P) override + { + resize(); + + ScopedTimer local_timer(evaluate_timer_); + // be aware of the sign of Displacement + const int num_targets_local = this->num_targets_; + const int num_sources_local = this->num_sources_; + const int num_padded = getAlignedSize(this->num_sources_); + + target_pos.resize(this->num_targets_ * D); + for (size_t iat = 0; iat < this->num_targets_; iat++) + for (size_t idim = 0; idim < D; idim++) + target_pos[iat * D + idim] = P.R[iat][idim]; + + auto* target_pos_ptr = target_pos.data(); + auto* source_pos_ptr = this->origin_.getCoordinates().getAllParticlePos().data(); + auto* r_dr_ptr = this->distances_[0].data(); + assert(this->distances_[0].data() + num_padded == this->displacements_[0].data()); + + // To maximize thread usage, the loop over electrons is chunked. Each + // chunk is sent to an OpenMP offload thread team. + const int ChunkSizePerTeam = 512; + const size_t num_teams = (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + const size_t stride_size = getPerTargetPctlStrideSize(); + + { + ScopedTimer offload(offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(this->num_targets_*num_teams) \ + map(to: source_pos_ptr[:num_padded*D]) \ + map(always, to: target_pos_ptr[:this->num_targets_*D]) \ + map(always, from: r_dr_ptr[:this->num_targets_*stride_size])") + for (int iat = 0; iat < num_targets_local; ++iat) + for (int team_id = 0; team_id < num_teams; team_id++) + { + const int first = ChunkSizePerTeam * team_id; + const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = target_pos_ptr[iat * D + idim]; + + auto* r_iat_ptr = r_dr_ptr + iat * stride_size; + auto* dr_iat_ptr = r_iat_ptr + num_padded; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, + num_padded, iel); + } + } + } + + inline void mw_evaluate(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list) const override + { + assert(this == &dt_list.getLeader()); + auto& dt_leader = dt_list.template getCastedLeader(); + + ScopedTimer local_timer(evaluate_timer_); + + const size_t nw = dt_list.size(); + DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_; + auto& mw_r_dr = mw_mem.mw_r_dr; + + size_t count_targets = 0; + for (ParticleSetT& p : p_list) + count_targets += p.getTotalNum(); + const size_t total_targets = count_targets; + + const int num_padded = getAlignedSize(this->num_sources_); + +#ifndef NDEBUG + const int stride_size = getPerTargetPctlStrideSize(); + count_targets = 0; + for (size_t iw = 0; iw < dt_list.size(); iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + + for (int i = 0; i < dt.targets(); ++i) + { + assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size); + assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded); + } + count_targets += dt.targets(); + } +#endif + + // This is horrible optimization putting different data types in a + // single buffer but allows a single H2D transfer + const size_t realtype_size = sizeof(RealType); + const size_t int_size = sizeof(int); + const size_t ptr_size = sizeof(RealType*); + auto& offload_input = mw_mem.offload_input; + offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size); + auto source_ptrs = reinterpret_cast(offload_input.data()); + auto target_positions = reinterpret_cast(offload_input.data() + ptr_size * nw); + auto walker_id_ptr = + reinterpret_cast(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size); + + count_targets = 0; + for (size_t iw = 0; iw < nw; iw++) + { + auto& dt = dt_list.template getCastedElement(iw); + ParticleSetT& pset(p_list[iw]); + + assert(dt.targets() == pset.getTotalNum()); + assert(this->num_sources_ == dt.num_sources_); + + auto& RSoA_OMPTarget = static_cast&>(dt.origin_.getCoordinates()); + source_ptrs[iw] = const_cast(RSoA_OMPTarget.getDevicePtr()); + + for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets) + { + walker_id_ptr[count_targets] = iw; + for (size_t idim = 0; idim < D; idim++) + target_positions[count_targets * D + idim] = pset.R[iat][idim]; + } + } + + // To maximize thread usage, the loop over electrons is chunked. Each + // chunk is sent to an OpenMP offload thread team. + const int ChunkSizePerTeam = 512; + const size_t num_teams = (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam; + + auto* r_dr_ptr = mw_r_dr.data(); + auto* input_ptr = offload_input.data(); + const int num_sources_local = this->num_sources_; + + { + ScopedTimer offload(dt_leader.offload_timer_); + PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \ + num_teams(total_targets*num_teams) \ + map(always, to: input_ptr[:offload_input.size()]) \ + depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait") + for (int iat = 0; iat < total_targets; ++iat) + for (int team_id = 0; team_id < num_teams; team_id++) + { + auto* target_pos_ptr = reinterpret_cast(input_ptr + ptr_size * nw); + const int walker_id = + reinterpret_cast(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat]; + auto* source_pos_ptr = reinterpret_cast(input_ptr)[walker_id]; + auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1); + auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded; + + const int first = ChunkSizePerTeam * team_id; + const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local); + + RealType pos[D]; + for (int idim = 0; idim < D; idim++) + pos[idim] = target_pos_ptr[iat * D + idim]; + + PRAGMA_OFFLOAD("omp parallel for") + for (int iel = first; iel < last; iel++) + DTD_BConds::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr, + num_padded, iel); + } + + if (!(this->modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) + { + PRAGMA_OFFLOAD("omp target update from(r_dr_ptr[:mw_r_dr.size()]) \ + depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait") + } + // wait for computing and (optional) transferring back to host. + // It can potentially be moved to ParticleSet to fuse multiple + // similar taskwait + PRAGMA_OFFLOAD("omp taskwait") + } + } + + inline void mw_recompute(const RefVectorWithLeader>& dt_list, + const RefVectorWithLeader>& p_list, + const std::vector& recompute) const override + { + mw_evaluate(dt_list, p_list); + } + + /// evaluate the temporary pair relations + inline void move(const ParticleSetT& P, const PosType& rnew, const IndexType iat, bool prepare_old) override + { + ScopedTimer local_timer(move_timer_); + DTD_BConds::computeDistances(rnew, this->origin_.getCoordinates().getAllParticlePos(), + this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_); + // If the full table is not ready all the time, overwrite the current + // value. If this step is missing, DT values can be undefined in case a + // move is rejected. + if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old) + DTD_BConds::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(), + this->distances_[iat].data(), this->displacements_[iat], 0, + this->num_sources_); + } + + /// update the stripe for jat-th particle + inline void update(IndexType iat) override + { + ScopedTimer local_timer(update_timer_); + std::copy_n(this->temp_r_.data(), this->num_sources_, this->distances_[iat].data()); + for (int idim = 0; idim < D; ++idim) + std::copy_n(this->temp_dr_.data(idim), this->num_sources_, this->displacements_[iat].data(idim)); + } + + int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override + { + RealType min_dist = std::numeric_limits::max(); + int index = -1; + if (newpos) + { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->temp_r_[jat] < min_dist) + { + min_dist = this->temp_r_[jat]; + index = jat; + } + if (index >= 0) + { + r = min_dist; + dr = this->temp_dr_[index]; + } + } + else + { + for (int jat = 0; jat < this->num_sources_; ++jat) + if (this->distances_[iat][jat] < min_dist) + { + min_dist = this->distances_[iat][jat]; + index = jat; + } + if (index >= 0) + { + r = min_dist; + dr = this->displacements_[iat][index]; + } + } + assert(index >= 0 && index < this->num_sources_); + return index; + } + +private: + /// timer for offload portion + NewTimer& offload_timer_; + /// timer for evaluate() + NewTimer& evaluate_timer_; + /// timer for move() + NewTimer& move_timer_; + /// timer for update() + NewTimer& update_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/VirtualParticleSet.cpp b/src/Particle/VirtualParticleSet.cpp deleted file mode 100644 index 7f3b4b9940..0000000000 --- a/src/Particle/VirtualParticleSet.cpp +++ /dev/null @@ -1,271 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2021 QMCPACK developers. -// -// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -/** @file VirtualParticleSet.cpp - * A proxy class to the quantum ParticleSet - */ - -#include "Configuration.h" -#include "VirtualParticleSet.h" -#include "Particle/DistanceTable.h" -#include "Particle/createDistanceTable.h" -#include "QMCHamiltonians/NLPPJob.h" -#include "ResourceCollection.h" - -namespace qmcplusplus -{ - -struct VPMultiWalkerMem : public Resource -{ - /// multi walker reference particle - Vector> mw_refPctls; - - VPMultiWalkerMem() : Resource("VPMultiWalkerMem") {} - - VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } -}; - -VirtualParticleSet::VirtualParticleSet(const ParticleSet& p, int nptcl, size_t dt_count_limit) - : ParticleSet(p.getSimulationCell()) -{ - setName("virtual"); - - //initialize local data structure - setSpinor(p.isSpinor()); - TotalNum = nptcl; - R.resize(nptcl); - if (isSpinor()) - spins.resize(nptcl); - coordinates_->resize(nptcl); - - //create distancetables - assert(dt_count_limit <= p.getNumDistTables()); - if (dt_count_limit == 0) - dt_count_limit = p.getNumDistTables(); - for (int i = 0; i < dt_count_limit; ++i) - if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST) - addTable(p.getDistTable(i).get_origin()); - else - addTable(p.getDistTable(i).get_origin(), DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST); -} - -VirtualParticleSet::~VirtualParticleSet() = default; - -Vector>& VirtualParticleSet::getMultiWalkerRefPctls() -{ - return mw_mem_handle_.getResource().mw_refPctls; -} - -const Vector>& VirtualParticleSet::getMultiWalkerRefPctls() const -{ - return mw_mem_handle_.getResource().mw_refPctls; -} - -void VirtualParticleSet::createResource(ResourceCollection& collection) const -{ - collection.addResource(std::make_unique()); - ParticleSet::createResource(collection); -} - -void VirtualParticleSet::acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& vp_list) -{ - auto& vp_leader = vp_list.getLeader(); - vp_leader.mw_mem_handle_ = collection.lendResource(); - - auto p_list = RefVectorWithLeaderParticleSet(vp_list); - ParticleSet::acquireResource(collection, p_list); -} - -void VirtualParticleSet::releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& vp_list) -{ - collection.takebackResource(vp_list.getLeader().mw_mem_handle_); - auto p_list = RefVectorWithLeaderParticleSet(vp_list); - ParticleSet::releaseResource(collection, p_list); -} - - -const RefVectorWithLeader VirtualParticleSet::extractDTRefList( - const RefVectorWithLeader& vp_list, - int id) -{ - RefVectorWithLeader dt_list(vp_list.getLeader().getDistTableAB(id)); - dt_list.reserve(vp_list.size()); - for (const VirtualParticleSet& vp : vp_list) - { - const auto& d_table = vp.getDistTableAB(id); - dt_list.push_back(d_table); - } - return dt_list; -} - - -const std::vector VirtualParticleSet::extractVPCoords( - const RefVectorWithLeader& vp_list) -{ - std::vector coords_list; - for (const VirtualParticleSet& vp : vp_list) - for (int iat = 0; iat < vp.getTotalNum(); iat++) - coords_list.push_back(vp.R[iat]); - - return coords_list; -} - - -/// move virtual particles to new postions and update distance tables -void VirtualParticleSet::makeMoves(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - bool sphere, - int iat) -{ - if (sphere && iat < 0) - throw std::runtime_error( - "VirtualParticleSet::makeMoves is invoked incorrectly, the flag sphere=true requires iat specified!"); - onSphere = sphere; - refPS = refp; - refPtcl = jel; - refSourcePtcl = iat; - assert(R.size() == deltaV.size()); - for (size_t ivp = 0; ivp < R.size(); ivp++) - R[ivp] = refp.R[jel] + deltaV[ivp]; - if (refp.isSpinor()) - for (size_t ivp = 0; ivp < R.size(); ivp++) - spins[ivp] = refp.spins[jel]; //no spin deltas in this API - update(); -} - -/// move virtual particles to new postions and update distance tables -void VirtualParticleSet::makeMovesWithSpin(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - const std::vector& deltaS, - bool sphere, - int iat) -{ - assert(refp.isSpinor()); - if (sphere && iat < 0) - throw std::runtime_error( - "VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the flag sphere=true requires iat specified!"); - onSphere = sphere; - refPS = refp; - refPtcl = jel; - refSourcePtcl = iat; - assert(R.size() == deltaV.size()); - assert(spins.size() == deltaS.size()); - for (size_t ivp = 0; ivp < R.size(); ivp++) - { - R[ivp] = refp.R[jel] + deltaV[ivp]; - spins[ivp] = refp.spins[jel] + deltaS[ivp]; - } - update(); -} - -void VirtualParticleSet::mw_makeMoves(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& refp_list, - const RefVector>& deltaV_list, - const RefVector>& joblist, - bool sphere) -{ - auto& vp_leader = vp_list.getLeader(); - vp_leader.onSphere = sphere; - vp_leader.refPS = refp_list.getLeader(); - - const size_t nVPs = countVPs(vp_list); - auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); - mw_refPctls.resize(nVPs); - - RefVectorWithLeader p_list(vp_leader); - p_list.reserve(vp_list.size()); - - size_t ivp = 0; - for (int iw = 0; iw < vp_list.size(); iw++) - { - VirtualParticleSet& vp(vp_list[iw]); - const std::vector& deltaV(deltaV_list[iw]); - const NLPPJob& job(joblist[iw]); - - vp.onSphere = sphere; - vp.refPS = refp_list[iw]; - vp.refPtcl = job.electron_id; - vp.refSourcePtcl = job.ion_id; - assert(vp.R.size() == deltaV.size()); - for (size_t k = 0; k < vp.R.size(); k++, ivp++) - { - vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; - if (vp_leader.isSpinor()) - vp.spins[k] = refp_list[iw].spins[vp.refPtcl]; //no spin deltas in this API - mw_refPctls[ivp] = vp.refPtcl; - } - p_list.push_back(vp); - } - assert(ivp == nVPs); - - mw_refPctls.updateTo(); - ParticleSet::mw_update(p_list); -} - -void VirtualParticleSet::mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& refp_list, - const RefVector>& deltaV_list, - const RefVector>& deltaS_list, - const RefVector>& joblist, - bool sphere) -{ - auto& vp_leader = vp_list.getLeader(); - if (!vp_leader.isSpinor()) - throw std::runtime_error( - "VirtualParticleSet::mw_makeMovesWithSpin should not be called if particle sets aren't spionor types"); - vp_leader.onSphere = sphere; - vp_leader.refPS = refp_list.getLeader(); - - const size_t nVPs = countVPs(vp_list); - auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); - mw_refPctls.resize(nVPs); - - RefVectorWithLeader p_list(vp_leader); - p_list.reserve(vp_list.size()); - - size_t ivp = 0; - for (int iw = 0; iw < vp_list.size(); iw++) - { - VirtualParticleSet& vp(vp_list[iw]); - const std::vector& deltaV(deltaV_list[iw]); - const std::vector& deltaS(deltaS_list[iw]); - const NLPPJob& job(joblist[iw]); - - vp.onSphere = sphere; - vp.refPS = refp_list[iw]; - vp.refPtcl = job.electron_id; - vp.refSourcePtcl = job.ion_id; - assert(vp.R.size() == deltaV.size()); - assert(vp.spins.size() == deltaS.size()); - assert(vp.R.size() == vp.spins.size()); - for (size_t k = 0; k < vp.R.size(); k++, ivp++) - { - vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; - vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k]; - mw_refPctls[ivp] = vp.refPtcl; - } - p_list.push_back(vp); - } - assert(ivp == nVPs); - - mw_refPctls.updateTo(); - ParticleSet::mw_update(p_list); -} - -} // namespace qmcplusplus diff --git a/src/Particle/VirtualParticleSet.h b/src/Particle/VirtualParticleSet.h index 61696e836e..552534efb0 100644 --- a/src/Particle/VirtualParticleSet.h +++ b/src/Particle/VirtualParticleSet.h @@ -18,146 +18,11 @@ #define QMCPLUSPLUS_VIRTUAL_PARTICLESET_H #include "Configuration.h" -#include "Particle/ParticleSet.h" -#include -#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "VirtualParticleSetT.h" namespace qmcplusplus { -// forward declaration. -class NonLocalECPComponent; -template -struct NLPPJob; -struct VPMultiWalkerMem; +using VirtualParticleSet = VirtualParticleSetT; -/** A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet - * Virtual moves are defined as moves being proposed but will never be accepted. - * VirtualParticleSet is introduced to avoid changing any internal states of the physical ParticleSet. - * For this reason, the physical ParticleSet is always marked const. - * It is heavily used by non-local PP evaluations. - */ -class VirtualParticleSet : public ParticleSet -{ -private: - /// true, if virtual particles are on a sphere for NLPP - bool onSphere; - /// multi walker resource - ResourceHandle mw_mem_handle_; - - Vector>& getMultiWalkerRefPctls(); - - /// ParticleSet this object refers to after makeMoves - std::optional> refPS; - -public: - /// Reference particle - int refPtcl; - /// Reference source particle, used when onSphere=true - int refSourcePtcl; - - /// ParticleSet this object refers to - const ParticleSet& getRefPS() const { return refPS.value(); } - - inline bool isOnSphere() const { return onSphere; } - - const Vector>& getMultiWalkerRefPctls() const; - - /** constructor - * @param p ParticleSet whose virtual moves are handled by this object - * @param nptcl number of virtual particles - * @param dt_count_limit distance tables corresepond to [0, dt_count_limit) of the reference particle set are created - */ - VirtualParticleSet(const ParticleSet& p, int nptcl, size_t dt_count_limit = 0); - - ~VirtualParticleSet(); - - /// initialize a shared resource and hand it to a collection - void createResource(ResourceCollection& collection) const; - /** acquire external resource and assocaite it with the list of ParticleSet - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); - /** release external resource - * Note: use RAII ResourceCollectionTeamLock whenever possible - */ - static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); - - /** move virtual particles to new postions and update distance tables - * @param refp reference particle set - * @param jel reference particle that all the VP moves from - * @param deltaV Position delta for virtual moves. - * @param sphere set true if VP are on a sphere around the reference source particle - * @param iat reference source particle - */ - void makeMoves(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - bool sphere = false, - int iat = -1); - - inline size_t getTotalNum() const { return TotalNum; } - /**Extract list of Distance Tables - */ - static const RefVectorWithLeader extractDTRefList( - const RefVectorWithLeader& vp_list, - int id); - /**Extract list of VP coordinates, flattened over all walkers - */ - static const std::vector extractVPCoords( - const RefVectorWithLeader& vp_list); - /** move virtual particles to new postions and update distance tables - * @param refp reference particle set - * @param jel reference particle that all the VP moves from - * @param deltaV Position delta for virtual moves. - * @param deltaS Spin delta for virtual moves. - * @param sphere set true if VP are on a sphere around the reference source particle - * @param iat reference source particle - */ - void makeMovesWithSpin(const ParticleSet& refp, - int jel, - const std::vector& deltaV, - const std::vector& deltaS, - bool sphere = false, - int iat = -1); - - static void mw_makeMoves(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& p_list, - const RefVector>& deltaV_list, - const RefVector>& joblist, - bool sphere); - - static void mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, - const RefVectorWithLeader& p_list, - const RefVector>& deltaV_list, - const RefVector>& deltaS_list, - const RefVector>& joblist, - bool sphere); - - static RefVectorWithLeader RefVectorWithLeaderParticleSet( - const RefVectorWithLeader& vp_list) - { - RefVectorWithLeader ref_list(vp_list.getLeader()); - ref_list.reserve(ref_list.size()); - for (VirtualParticleSet& vp : vp_list) - ref_list.push_back(vp); - return ref_list; - } - - static size_t countVPs(const RefVectorWithLeader& vp_list) - { - size_t nVPs = 0; - for (const VirtualParticleSet& vp : vp_list) - nVPs += vp.getTotalNum(); - return nVPs; - } - - static size_t countVPs(const RefVectorWithLeader& vp_list) - { - size_t nVPs = 0; - for (const VirtualParticleSet& vp : vp_list) - nVPs += vp.getTotalNum(); - return nVPs; - } -}; } // namespace qmcplusplus #endif diff --git a/src/Particle/VirtualParticleSetT.cpp b/src/Particle/VirtualParticleSetT.cpp new file mode 100644 index 0000000000..126d61611b --- /dev/null +++ b/src/Particle/VirtualParticleSetT.cpp @@ -0,0 +1,288 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file VirtualParticleSet.cpp + * A proxy class to the quantum ParticleSet + */ + +#include "VirtualParticleSetT.h" + +#include "Particle/DistanceTableT.h" +#include "Particle/createDistanceTableT.h" +#include "QMCHamiltonians/NLPPJob.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ + +struct VPMultiWalkerMem : public Resource +{ + /// multi walker reference particle + Vector> mw_refPctls; + + VPMultiWalkerMem() : Resource("VPMultiWalkerMem") {} + + VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } +}; + +template +VirtualParticleSetT::VirtualParticleSetT(const ParticleSetT& p, int nptcl, size_t dt_count_limit) + : ParticleSetT(p.getSimulationCell()) +{ + this->setName("virtual"); + + // initialize local data structure + this->setSpinor(p.isSpinor()); + this->TotalNum = nptcl; + this->R.resize(nptcl); + if (this->isSpinor()) + this->spins.resize(nptcl); + this->coordinates_->resize(nptcl); + + // create distancetables + assert(dt_count_limit <= p.getNumDistTables()); + if (dt_count_limit == 0) + dt_count_limit = p.getNumDistTables(); + for (int i = 0; i < dt_count_limit; ++i) + if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST) + this->addTable(p.getDistTable(i).get_origin()); + else + this->addTable(p.getDistTable(i).get_origin(), DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST); +} + +template +VirtualParticleSetT::~VirtualParticleSetT() = default; + +template +Vector>& VirtualParticleSetT::getMultiWalkerRefPctls() +{ + return mw_mem_handle_.getResource().mw_refPctls; +} + +template +const Vector>& VirtualParticleSetT::getMultiWalkerRefPctls() const +{ + return mw_mem_handle_.getResource().mw_refPctls; +} + +template +void VirtualParticleSetT::createResource(ResourceCollection& collection) const +{ + collection.addResource(std::make_unique()); + ParticleSetT::createResource(collection); +} + +template +void VirtualParticleSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list) +{ + auto& vp_leader = vp_list.getLeader(); + vp_leader.mw_mem_handle_ = collection.lendResource(); + + auto p_list = RefVectorWithLeaderParticleSet(vp_list); + ParticleSetT::acquireResource(collection, p_list); +} + +template +void VirtualParticleSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& vp_list) +{ + collection.takebackResource(vp_list.getLeader().mw_mem_handle_); + auto p_list = RefVectorWithLeaderParticleSet(vp_list); + ParticleSetT::releaseResource(collection, p_list); +} + +template +const RefVectorWithLeader> VirtualParticleSetT::extractDTRefList( + const RefVectorWithLeader>& vp_list, + int id) +{ + RefVectorWithLeader> dt_list(vp_list.getLeader().getDistTableAB(id)); + dt_list.reserve(vp_list.size()); + for (const VirtualParticleSetT& vp : vp_list) + { + const auto& d_table = vp.getDistTableAB(id); + dt_list.push_back(d_table); + } + return dt_list; +} + +template +const std::vector::PosType> VirtualParticleSetT::extractVPCoords( + const RefVectorWithLeader>& vp_list) +{ + std::vector coords_list; + for (const VirtualParticleSetT& vp : vp_list) + for (int iat = 0; iat < vp.getTotalNum(); iat++) + coords_list.push_back(vp.R[iat]); + + return coords_list; +} + +/// move virtual particles to new postions and update distance tables +template +void VirtualParticleSetT::makeMoves(const ParticleSetT& refp, + int jel, + const std::vector& deltaV, + bool sphere, + int iat) +{ + if (sphere && iat < 0) + throw std::runtime_error("VirtualParticleSet::makeMoves is invoked incorrectly, the flag " + "sphere=true requires iat specified!"); + onSphere = sphere; + refPS = refp; + refPtcl = jel; + refSourcePtcl = iat; + assert(this->R.size() == deltaV.size()); + for (size_t ivp = 0; ivp < this->R.size(); ivp++) + this->R[ivp] = refp.R[jel] + deltaV[ivp]; + if (refp.isSpinor()) + for (size_t ivp = 0; ivp < this->R.size(); ivp++) + this->spins[ivp] = refp.spins[jel]; // no spin deltas in this API + this->update(); +} + +/// move virtual particles to new postions and update distance tables +template +void VirtualParticleSetT::makeMovesWithSpin(const ParticleSetT& refp, + int jel, + const std::vector& deltaV, + const std::vector& deltaS, + bool sphere, + int iat) +{ + assert(refp.isSpinor()); + if (sphere && iat < 0) + throw std::runtime_error("VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the " + "flag sphere=true requires iat specified!"); + onSphere = sphere; + refPS = refp; + refPtcl = jel; + refSourcePtcl = iat; + assert(this->R.size() == deltaV.size()); + assert(this->spins.size() == deltaS.size()); + for (size_t ivp = 0; ivp < this->R.size(); ivp++) + { + this->R[ivp] = refp.R[jel] + deltaV[ivp]; + this->spins[ivp] = refp.spins[jel] + deltaS[ivp]; + } + this->update(); +} + +template +void VirtualParticleSetT::mw_makeMoves(const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& refp_list, + const RefVector>& deltaV_list, + const RefVector>& joblist, + bool sphere) +{ + auto& vp_leader = vp_list.getLeader(); + vp_leader.onSphere = sphere; + vp_leader.refPS = refp_list.getLeader(); + + const size_t nVPs = countVPs(vp_list); + auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); + mw_refPctls.resize(nVPs); + + RefVectorWithLeader> p_list(vp_leader); + p_list.reserve(vp_list.size()); + + size_t ivp = 0; + for (int iw = 0; iw < vp_list.size(); iw++) + { + VirtualParticleSetT& vp(vp_list[iw]); + const std::vector& deltaV(deltaV_list[iw]); + const NLPPJob& job(joblist[iw]); + + vp.onSphere = sphere; + vp.refPS = refp_list[iw]; + vp.refPtcl = job.electron_id; + vp.refSourcePtcl = job.ion_id; + assert(vp.R.size() == deltaV.size()); + for (size_t k = 0; k < vp.R.size(); k++, ivp++) + { + vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; + if (vp_leader.isSpinor()) + vp.spins[k] = refp_list[iw].spins[vp.refPtcl]; // no spin deltas in this API + mw_refPctls[ivp] = vp.refPtcl; + } + p_list.push_back(vp); + } + assert(ivp == nVPs); + + mw_refPctls.updateTo(); + ParticleSetT::mw_update(p_list); +} + +template +void VirtualParticleSetT::mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& refp_list, + const RefVector>& deltaV_list, + const RefVector>& deltaS_list, + const RefVector>& joblist, + bool sphere) +{ + auto& vp_leader = vp_list.getLeader(); + if (!vp_leader.isSpinor()) + throw std::runtime_error("VirtualParticleSet::mw_makeMovesWithSpin should not be called if " + "particle sets aren't spionor types"); + vp_leader.onSphere = sphere; + vp_leader.refPS = refp_list.getLeader(); + + const size_t nVPs = countVPs(vp_list); + auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls(); + mw_refPctls.resize(nVPs); + + RefVectorWithLeader> p_list(vp_leader); + p_list.reserve(vp_list.size()); + + size_t ivp = 0; + for (int iw = 0; iw < vp_list.size(); iw++) + { + VirtualParticleSetT& vp(vp_list[iw]); + const std::vector& deltaV(deltaV_list[iw]); + const std::vector& deltaS(deltaS_list[iw]); + const NLPPJob& job(joblist[iw]); + + vp.onSphere = sphere; + vp.refPS = refp_list[iw]; + vp.refPtcl = job.electron_id; + vp.refSourcePtcl = job.ion_id; + assert(vp.R.size() == deltaV.size()); + assert(vp.spins.size() == deltaS.size()); + assert(vp.R.size() == vp.spins.size()); + for (size_t k = 0; k < vp.R.size(); k++, ivp++) + { + vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k]; + vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k]; + mw_refPctls[ivp] = vp.refPtcl; + } + p_list.push_back(vp); + } + assert(ivp == nVPs); + + mw_refPctls.updateTo(); + ParticleSetT::mw_update(p_list); +} + +#ifndef QMC_COMPLEX +template class VirtualParticleSetT; +template class VirtualParticleSetT; +#else +template class VirtualParticleSetT>; +template class VirtualParticleSetT>; +#endif +} // namespace qmcplusplus diff --git a/src/Particle/VirtualParticleSetT.h b/src/Particle/VirtualParticleSetT.h new file mode 100644 index 0000000000..d706bd89ae --- /dev/null +++ b/src/Particle/VirtualParticleSetT.h @@ -0,0 +1,166 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H +#define QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Particle/ParticleSet.h" +#include + +namespace qmcplusplus +{ +// forward declaration. +class NonLocalECPComponent; +template +struct NLPPJob; +struct VPMultiWalkerMem; + +/** A ParticleSet that handles virtual moves of a selected particle of a given + * physical ParticleSet Virtual moves are defined as moves being proposed but + * will never be accepted. VirtualParticleSet is introduced to avoid changing + * any internal states of the physical ParticleSet. For this reason, the + * physical ParticleSet is always marked const. It is heavily used by non-local + * PP evaluations. + */ +template +class VirtualParticleSetT : public ParticleSetT +{ +public: + using RealType = typename ParticleSetT::RealType; + using PosType = typename ParticleSetT::PosType; + +private: + /// true, if virtual particles are on a sphere for NLPP + bool onSphere; + /// multi walker resource + ResourceHandle mw_mem_handle_; + + Vector>& getMultiWalkerRefPctls(); + + /// ParticleSet this object refers to after makeMoves + std::optional>> refPS; + +public: + /// Reference particle + int refPtcl; + /// Reference source particle, used when onSphere=true + int refSourcePtcl; + + /// ParticleSet this object refers to + const ParticleSetT& getRefPS() const { return refPS.value(); } + + inline bool isOnSphere() const { return onSphere; } + + const Vector>& getMultiWalkerRefPctls() const; + + /** constructor + * @param p ParticleSet whose virtual moves are handled by this object + * @param nptcl number of virtual particles + * @param dt_count_limit distance tables corresepond to [0, dt_count_limit) + * of the reference particle set are created + */ + VirtualParticleSetT(const ParticleSetT& p, int nptcl, size_t dt_count_limit = 0); + + ~VirtualParticleSetT(); + + /// initialize a shared resource and hand it to a collection + void createResource(ResourceCollection& collection) const; + /** acquire external resource and assocaite it with the list of ParticleSet + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); + /** release external resource + * Note: use RAII ResourceCollectionTeamLock whenever possible + */ + static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& vp_list); + + /** move virtual particles to new postions and update distance tables + * @param refp reference particle set + * @param jel reference particle that all the VP moves from + * @param deltaV Position delta for virtual moves. + * @param sphere set true if VP are on a sphere around the reference source + * particle + * @param iat reference source particle + */ + void makeMoves(const ParticleSetT& refp, + int jel, + const std::vector& deltaV, + bool sphere = false, + int iat = -1); + + static const RefVectorWithLeader> extractDTRefList( + const RefVectorWithLeader>& vp_list, + int id); + + /**Extract list of VP coordinates, flattened over all walkers + */ + static const std::vector extractVPCoords( + const RefVectorWithLeader>& vp_list); + + /** move virtual particles to new postions and update distance tables + * @param refp reference particle set + * @param jel reference particle that all the VP moves from + * @param deltaV Position delta for virtual moves. + * @param deltaS Spin delta for virtual moves. + * @param sphere set true if VP are on a sphere around the reference source + * particle + * @param iat reference source particle + */ + void makeMovesWithSpin(const ParticleSetT& refp, + int jel, + const std::vector& deltaV, + const std::vector& deltaS, + bool sphere = false, + int iat = -1); + + static void mw_makeMoves(const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& p_list, + const RefVector>& deltaV_list, + const RefVector>& joblist, + bool sphere); + + static void mw_makeMovesWithSpin(const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& p_list, + const RefVector>& deltaV_list, + const RefVector>& deltaS_list, + const RefVector>& joblist, + bool sphere); + + static RefVectorWithLeader> RefVectorWithLeaderParticleSet( + const RefVectorWithLeader& vp_list) + { + RefVectorWithLeader> ref_list(vp_list.getLeader()); + ref_list.reserve(ref_list.size()); + for (VirtualParticleSetT& vp : vp_list) + ref_list.push_back(vp); + return ref_list; + } + + static size_t countVPs(const RefVectorWithLeader& vp_list) + { + size_t nVPs = 0; + for (const VirtualParticleSetT& vp : vp_list) + nVPs += vp.getTotalNum(); + return nVPs; + } + + static size_t countVPs(const RefVectorWithLeader& vp_list) + { + size_t nVPs = 0; + for (const VirtualParticleSetT& vp : vp_list) + nVPs += vp.getTotalNum(); + return nVPs; + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/WalkerConfigurations.h b/src/Particle/WalkerConfigurations.h index 1d150a2232..da0fe17853 100644 --- a/src/Particle/WalkerConfigurations.h +++ b/src/Particle/WalkerConfigurations.h @@ -23,168 +23,10 @@ #ifndef QMCPLUSPLUS_WALKERCONFIGURATIONS_H #define QMCPLUSPLUS_WALKERCONFIGURATIONS_H #include "Configuration.h" -#include "Particle/Walker.h" -#include "Utilities/IteratorUtility.h" +#include "Particle/WalkerConfigurationsT.h" namespace qmcplusplus { -/** Monte Carlo Data of an ensemble - * - * The quantities are shared by all the nodes in a group - * - NumSamples number of samples - * - Weight total weight of a sample - * - Energy average energy of a sample - * - Variance variance - * - LivingFraction fraction of walkers alive each step. - */ -template -struct MCDataType -{ - T NumSamples; - T RNSamples; - T Weight; - T Energy; - T AlternateEnergy; - T Variance; - T R2Accepted; - T R2Proposed; - T LivingFraction; -}; - -/** A set of light weight walkers that are carried between driver sections and restart - */ -class WalkerConfigurations -{ -public: - /// walker type - using Walker_t = Walker; - using FullPrecRealType = QMCTraits::FullPrecRealType; - ///container type of Walkers - using walker_list__t = std::vector>; - /// FIX: a type alias of iterator for an object should not be for just one of many objects it holds. - using iterator = walker_list__t::iterator; - ///const_iterator of Walker container - using const_iterator = walker_list__t::const_iterator; - - MCDataType EnsembleProperty; - - WalkerConfigurations(); - ~WalkerConfigurations(); - WalkerConfigurations(const WalkerConfigurations&) = delete; - WalkerConfigurations& operator=(const WalkerConfigurations&) = delete; - WalkerConfigurations(WalkerConfigurations&&) = default; - WalkerConfigurations& operator=(WalkerConfigurations&&) = default; - - /** create numWalkers Walkers - * - * Append Walkers to walker_list_. - */ - void createWalkers(int numWalkers, size_t numPtcls); - /** create walkers - * @param first walker iterator - * @param last walker iterator - */ - void createWalkers(iterator first, iterator last); - /** copy walkers - * @param first input walker iterator - * @param last input walker iterator - * @param start first target iterator - * - * No memory allocation is allowed. - */ - void copyWalkers(iterator first, iterator last, iterator start); - - /** destroy Walkers from itstart to itend - *@param first starting iterator of the walkers - *@param last ending iterator of the walkers - */ - iterator destroyWalkers(iterator first, iterator last); - - /** destroy Walkers - *@param nw number of walkers to be destroyed - */ - void destroyWalkers(int nw); - - ///clean up the walker list and make a new list - void resize(int numWalkers, size_t numPtcls); - - ///return the number of active walkers - inline size_t getActiveWalkers() const { return walker_list_.size(); } - ///return the total number of active walkers among a MPI group - inline size_t getGlobalNumWalkers() const { return walker_offsets_.empty() ? 0 : walker_offsets_.back(); } - ///return the total number of active walkers among a MPI group - - inline void setWalkerOffsets(const std::vector& o) { walker_offsets_ = o; } - inline const std::vector& getWalkerOffsets() const { return walker_offsets_; } - - /// return the first iterator - inline iterator begin() { return walker_list_.begin(); } - /// return the last iterator, [begin(), end()) - inline iterator end() { return walker_list_.end(); } - - /// return the first const_iterator - inline const_iterator begin() const { return walker_list_.begin(); } - - /// return the last const_iterator [begin(), end()) - inline const_iterator end() const { return walker_list_.end(); } - /**@}*/ - - /** clear the walker_list_ without destroying them - * - * Provide std::vector::clear interface - */ - inline void clear() { walker_list_.clear(); } - - /** insert elements - * @param it locator where the inserting begins - * @param first starting iterator - * @param last ending iterator - * - * Provide std::vector::insert interface - */ - template - inline void insert(iterator it, INPUT_ITER first, INPUT_ITER last) - { - walker_list_.insert(it, first, last); - } - - /** add Walker_t* at the end - * @param awalker pointer to a walker - * - * Provide std::vector::push_back interface - */ - inline void push_back(std::unique_ptr awalker) { walker_list_.push_back(std::move(awalker)); } - - /** delete the last Walker_t* - * - * Provide std::vector::pop_back interface - */ - inline void pop_back() { walker_list_.pop_back(); } - - inline Walker_t* operator[](int i) { return walker_list_[i].get(); } - - inline const Walker_t* operator[](int i) const { return walker_list_[i].get(); } - - /** reset the Walkers - */ - void reset(); - - ///save the particle positions of all the walkers into target - void putConfigurations(Walker_t::RealType* target, QMCTraits::FullPrecRealType* weights) const; - -protected: - ///a collection of walkers - walker_list__t walker_list_; - -private: - /** starting index of the walkers in a processor group - * - * walker_offsets_[0]=0 and walker_offsets_[walker_offsets_.size()-1]=total number of walkers in a group - * walker_offsets_[processorid+1]-walker_offsets_[processorid] is equal to the number of walkers on a processor, - * i.e., W.getActiveWalkers(). - * walker_offsets_ is added to handle parallel I/O with hdf5 - */ - std::vector walker_offsets_; -}; +using WalkerConfigurations = WalkerConfigurationsT; } // namespace qmcplusplus #endif diff --git a/src/Particle/WalkerConfigurations.cpp b/src/Particle/WalkerConfigurationsT.cpp similarity index 70% rename from src/Particle/WalkerConfigurations.cpp rename to src/Particle/WalkerConfigurationsT.cpp index a3959d1610..25bb3ca926 100644 --- a/src/Particle/WalkerConfigurations.cpp +++ b/src/Particle/WalkerConfigurationsT.cpp @@ -16,20 +16,27 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "WalkerConfigurationsT.h" -#include "WalkerConfigurations.h" -#include #include "Utilities/IteratorUtility.h" +#include "Platforms/Host/OutputManager.h" + +#include namespace qmcplusplus { -WalkerConfigurations::WalkerConfigurations() = default; - -///default destructor -WalkerConfigurations::~WalkerConfigurations() { destroyWalkers(walker_list_.begin(), walker_list_.end()); } +template +WalkerConfigurationsT::WalkerConfigurationsT() = default; +/// default destructor +template +WalkerConfigurationsT::~WalkerConfigurationsT() +{ + destroyWalkers(walker_list_.begin(), walker_list_.end()); +} -void WalkerConfigurations::createWalkers(int n, size_t numPtcls) +template +void WalkerConfigurationsT::createWalkers(int n, size_t numPtcls) { if (walker_list_.empty()) { @@ -43,7 +50,7 @@ void WalkerConfigurations::createWalkers(int n, size_t numPtcls) { if (walker_list_.size() >= n) { - int iw = walker_list_.size(); //copy from the back + int iw = walker_list_.size(); // copy from the back for (int i = 0; i < n; ++i) { walker_list_.push_back(std::make_unique(*walker_list_[--iw])); @@ -68,8 +75,8 @@ void WalkerConfigurations::createWalkers(int n, size_t numPtcls) } } - -void WalkerConfigurations::resize(int numWalkers, size_t numPtcls) +template +void WalkerConfigurationsT::resize(int numWalkers, size_t numPtcls) { int dn = numWalkers - walker_list_.size(); if (dn > 0) @@ -84,13 +91,15 @@ void WalkerConfigurations::resize(int numWalkers, size_t numPtcls) } } -///returns the next valid iterator -WalkerConfigurations::iterator WalkerConfigurations::destroyWalkers(iterator first, iterator last) +/// returns the next valid iterator +template +typename WalkerConfigurationsT::iterator WalkerConfigurationsT::destroyWalkers(iterator first, iterator last) { return walker_list_.erase(first, last); } -void WalkerConfigurations::createWalkers(iterator first, iterator last) +template +void WalkerConfigurationsT::createWalkers(iterator first, iterator last) { destroyWalkers(walker_list_.begin(), walker_list_.end()); while (first != last) @@ -100,7 +109,8 @@ void WalkerConfigurations::createWalkers(iterator first, iterator last) } } -void WalkerConfigurations::destroyWalkers(int nw) +template +void WalkerConfigurationsT::destroyWalkers(int nw) { if (nw > walker_list_.size()) { @@ -112,7 +122,8 @@ void WalkerConfigurations::destroyWalkers(int nw) walker_list_.erase(walker_list_.begin() + nw, walker_list_.end()); } -void WalkerConfigurations::copyWalkers(iterator first, iterator last, iterator it) +template +void WalkerConfigurationsT::copyWalkers(iterator first, iterator last, iterator it) { while (first != last) { @@ -126,7 +137,8 @@ void WalkerConfigurations::copyWalkers(iterator first, iterator last, iterator i * * R + D + X */ -void WalkerConfigurations::reset() +template +void WalkerConfigurationsT::reset() { for (auto& walker : walker_list_) { @@ -135,7 +147,8 @@ void WalkerConfigurations::reset() } } -void WalkerConfigurations::putConfigurations(Walker_t::RealType* target, QMCTraits::FullPrecRealType* weights) const +template +void WalkerConfigurationsT::putConfigurations(RealType* target, FullPrecRealType* weights) const { for (const auto& walker : walker_list_) { @@ -146,4 +159,9 @@ void WalkerConfigurations::putConfigurations(Walker_t::RealType* target, QMCTrai } } +template class WalkerConfigurationsT; +template class WalkerConfigurationsT; +template class WalkerConfigurationsT>; +template class WalkerConfigurationsT>; + } // namespace qmcplusplus diff --git a/src/Particle/WalkerConfigurationsT.h b/src/Particle/WalkerConfigurationsT.h new file mode 100644 index 0000000000..83a471cc7a --- /dev/null +++ b/src/Particle/WalkerConfigurationsT.h @@ -0,0 +1,193 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign +// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_WALKERCONFIGURATIONST_H +#define QMCPLUSPLUS_WALKERCONFIGURATIONST_H +#include "Particle/ParticleSetTraits.h" +#include "Particle/Walker.h" +#include "Utilities/IteratorUtility.h" + +#include + +namespace qmcplusplus +{ +/** Monte Carlo Data of an ensemble + * + * The quantities are shared by all the nodes in a group + * - NumSamples number of samples + * - Weight total weight of a sample + * - Energy average energy of a sample + * - Variance variance + * - LivingFraction fraction of walkers alive each step. + */ +template +struct MCDataType +{ + T NumSamples; + T RNSamples; + T Weight; + T Energy; + T AlternateEnergy; + T Variance; + T R2Accepted; + T R2Proposed; + T LivingFraction; +}; + +/** A set of light weight walkers that are carried between driver sections and + * restart + */ +template +class WalkerConfigurationsT +{ +public: + /// walker type + using Walker_t = Walker, LatticeParticleTraits>; + using RealType = typename Walker_t::RealType; + using FullPrecRealType = typename ParticleSetTraits::FullPrecRealType; + /// container type of Walkers + using walker_list__t = std::vector>; + /// FIX: a type alias of iterator for an object should not be for just one + /// of many objects it holds. + using iterator = typename walker_list__t::iterator; + /// const_iterator of Walker container + using const_iterator = typename walker_list__t::const_iterator; + + MCDataType EnsembleProperty; + + WalkerConfigurationsT(); + ~WalkerConfigurationsT(); + WalkerConfigurationsT(const WalkerConfigurationsT&) = delete; + WalkerConfigurationsT& operator=(const WalkerConfigurationsT&) = delete; + WalkerConfigurationsT(WalkerConfigurationsT&&) = default; + WalkerConfigurationsT& operator=(WalkerConfigurationsT&&) = default; + + /** create numWalkers Walkers + * + * Append Walkers to walker_list_. + */ + void createWalkers(int numWalkers, size_t numPtcls); + /** create walkers + * @param first walker iterator + * @param last walker iterator + */ + void createWalkers(iterator first, iterator last); + /** copy walkers + * @param first input walker iterator + * @param last input walker iterator + * @param start first target iterator + * + * No memory allocation is allowed. + */ + void copyWalkers(iterator first, iterator last, iterator start); + + /** destroy Walkers from itstart to itend + *@param first starting iterator of the walkers + *@param last ending iterator of the walkers + */ + iterator destroyWalkers(iterator first, iterator last); + + /** destroy Walkers + *@param nw number of walkers to be destroyed + */ + void destroyWalkers(int nw); + + /// clean up the walker list and make a new list + void resize(int numWalkers, size_t numPtcls); + + /// return the number of active walkers + inline size_t getActiveWalkers() const { return walker_list_.size(); } + /// return the total number of active walkers among a MPI group + inline size_t getGlobalNumWalkers() const { return walker_offsets_.empty() ? 0 : walker_offsets_.back(); } + /// return the total number of active walkers among a MPI group + + inline void setWalkerOffsets(const std::vector& o) { walker_offsets_ = o; } + inline const std::vector& getWalkerOffsets() const { return walker_offsets_; } + + /// return the first iterator + inline iterator begin() { return walker_list_.begin(); } + /// return the last iterator, [begin(), end()) + inline iterator end() { return walker_list_.end(); } + + /// return the first const_iterator + inline const_iterator begin() const { return walker_list_.begin(); } + + /// return the last const_iterator [begin(), end()) + inline const_iterator end() const { return walker_list_.end(); } + /**@}*/ + + /** clear the walker_list_ without destroying them + * + * Provide std::vector::clear interface + */ + inline void clear() { walker_list_.clear(); } + + /** insert elements + * @param it locator where the inserting begins + * @param first starting iterator + * @param last ending iterator + * + * Provide std::vector::insert interface + */ + template + inline void insert(iterator it, INPUT_ITER first, INPUT_ITER last) + { + walker_list_.insert(it, first, last); + } + + /** add Walker_t* at the end + * @param awalker pointer to a walker + * + * Provide std::vector::push_back interface + */ + inline void push_back(std::unique_ptr awalker) { walker_list_.push_back(std::move(awalker)); } + + /** delete the last Walker_t* + * + * Provide std::vector::pop_back interface + */ + inline void pop_back() { walker_list_.pop_back(); } + + inline Walker_t* operator[](int i) { return walker_list_[i].get(); } + + inline const Walker_t* operator[](int i) const { return walker_list_[i].get(); } + + /** reset the Walkers + */ + void reset(); + + /// save the particle positions of all the walkers into target + void putConfigurations(RealType* target, FullPrecRealType* weights) const; + +protected: + /// a collection of walkers + walker_list__t walker_list_; + +private: + /** starting index of the walkers in a processor group + * + * walker_offsets_[0]=0 and walker_offsets_[walker_offsets_.size()-1]=total + * number of walkers in a group + * walker_offsets_[processorid+1]-walker_offsets_[processorid] is equal to + * the number of walkers on a processor, i.e., W.getActiveWalkers(). + * walker_offsets_ is added to handle parallel I/O with hdf5 + */ + std::vector walker_offsets_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Particle/createDistanceTable.h b/src/Particle/createDistanceTable.h index 45d305f670..6b8b5dc97f 100644 --- a/src/Particle/createDistanceTable.h +++ b/src/Particle/createDistanceTable.h @@ -14,61 +14,6 @@ #ifndef QMCPLUSPLUS_DISTANCETABLE_H #define QMCPLUSPLUS_DISTANCETABLE_H -#include "Particle/ParticleSet.h" +#include "Particle/createDistanceTableT.h" -namespace qmcplusplus -{ -/** Class to manage multiple DistanceTable objects. - * - * \date 2008-09-19 - * static data members are removed. DistanceTable::add functions - * are kept for compatibility only. New codes should use a member function - * of ParticleSet to add a distance table - * int ParticleSet::addTable(const ParticleSet& source) - * - * \deprecated There is only one instance of the data memebers of - * DistanceTable in an application and the data are shared by many objects. - * Note that static data members and functions are used - * (based on singleton and factory patterns). - *\todo DistanceTable should work as a factory, as well, to instantiate DistanceTable - * subject to different boundary conditions. - * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by DistanceTable - * to generically control the crystalline structure. - */ - -///free function to create a distable table of s-s -std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description); -std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description); - -inline std::unique_ptr createDistanceTable(ParticleSet& s, std::ostream& description) -{ - // during P-by-P move, the cost of single particle evaluation of distance tables - // is determined by the number of source particles. - // Thus the implementation selection is determined by the source particle set. - if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) - return createDistanceTableAAOMPTarget(s, description); - else - return createDistanceTableAA(s, description); -} - -///free function create a distable table of s-t -std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description); -std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, - ParticleSet& t, - std::ostream& description); - -inline std::unique_ptr createDistanceTable(const ParticleSet& s, - ParticleSet& t, - std::ostream& description) -{ - // during P-by-P move, the cost of single particle evaluation of distance tables - // is determined by the number of source particles. - // Thus the implementation selection is determined by the source particle set. - if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) - return createDistanceTableABOMPTarget(s, t, description); - else - return createDistanceTableAB(s, t, description); -} - -} // namespace qmcplusplus #endif diff --git a/src/Particle/createDistanceTableAA.cpp b/src/Particle/createDistanceTableAA.cpp deleted file mode 100644 index fccbdd9ff5..0000000000 --- a/src/Particle/createDistanceTableAA.cpp +++ /dev/null @@ -1,98 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAA.h" - -namespace qmcplusplus -{ -/** Adding SymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAA(ParticleSet& s, std::ostream& description) -{ - using RealType = OHMMS_PRECISION; - enum - { - DIM = OHMMS_DIM - }; - const int sc = s.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for similar particles (A-A):" << std::endl; - o << " source/target: " << s.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s); - } - - description << o.str() << std::endl; - return dt; -} - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableAAOMPTarget.cpp b/src/Particle/createDistanceTableAAOMPTarget.cpp deleted file mode 100644 index 14680e07d3..0000000000 --- a/src/Particle/createDistanceTableAAOMPTarget.cpp +++ /dev/null @@ -1,98 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAAOMPTarget.h" - -namespace qmcplusplus -{ -/** Adding SymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAAOMPTarget(ParticleSet& s, std::ostream& description) -{ - using RealType = OHMMS_PRECISION; - enum - { - DIM = OHMMS_DIM - }; - const int sc = s.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for similar particles (A-A):" << std::endl; - o << " source/target: " << s.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s); - } - - description << o.str() << std::endl; - return dt; -} - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableAB.cpp b/src/Particle/createDistanceTableAB.cpp deleted file mode 100644 index 9bb7a595f7..0000000000 --- a/src/Particle/createDistanceTableAB.cpp +++ /dev/null @@ -1,99 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableAB.h" - -namespace qmcplusplus -{ -/** Adding AsymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableAB(const ParticleSet& s, ParticleSet& t, std::ostream& description) -{ - using RealType = ParticleSet::RealType; - enum - { - DIM = OHMMS_DIM - }; - const int sc = t.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for dissimilar particles (A-B):" << std::endl; - o << " source: " << s.getName() << " target: " << t.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s, t); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - - description << o.str() << std::endl; - return dt; -} - - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableABOMPTarget.cpp b/src/Particle/createDistanceTableABOMPTarget.cpp deleted file mode 100644 index 5da851837f..0000000000 --- a/src/Particle/createDistanceTableABOMPTarget.cpp +++ /dev/null @@ -1,101 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "Particle/createDistanceTable.h" -#include "Particle/DistanceTable.h" -#include "Particle/SoaDistanceTableABOMPTarget.h" - -namespace qmcplusplus -{ -/** Adding AsymmetricDTD to the list, e.g., el-el distance table - *\param s source/target particle set - *\return index of the distance table with the name - */ -std::unique_ptr createDistanceTableABOMPTarget(const ParticleSet& s, - ParticleSet& t, - std::ostream& description) -{ - using RealType = ParticleSet::RealType; - enum - { - DIM = OHMMS_DIM - }; - const int sc = t.getLattice().SuperCellEnum; - std::unique_ptr dt; - std::ostringstream o; - o << " Distance table for dissimilar particles (A-B):" << std::endl; - o << " source: " << s.getName() << " target: " << t.getName() << std::endl; - o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; - - if (sc == SUPERCELL_BULK) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic periodic cell in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 3D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 3D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_SLAB) - { - if (s.getLattice().DiagonalOnly) - { - o << " Distance computations use orthorhombic code for periodic cell in 2D." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) - { - o << " Distance computations use general periodic cell in 2D with corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - else - { - o << " Distance computations use general periodic cell in 2D without corner image checks." << std::endl; - dt = std::make_unique>(s, t); - } - } - } - else if (sc == SUPERCELL_WIRE) - { - o << " Distance computations use periodic cell in one dimension." << std::endl; - dt = std::make_unique>(s, t); - } - else //open boundary condition - { - o << " Distance computations use open boundary conditions in 3D." << std::endl; - dt = std::make_unique>(s, t); - } - - description << o.str() << std::endl; - return dt; -} - - -} //namespace qmcplusplus diff --git a/src/Particle/createDistanceTableT.cpp b/src/Particle/createDistanceTableT.cpp new file mode 100644 index 0000000000..e3fba71204 --- /dev/null +++ b/src/Particle/createDistanceTableT.cpp @@ -0,0 +1,230 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/createDistanceTableT.h" + +#include "CPU/SIMD/algorithm.hpp" +#include "Particle/DistanceTableT.h" +#include "Particle/SoaDistanceTableAAT.h" +#include "Particle/SoaDistanceTableAATOMPTarget.h" +#include "Particle/SoaDistanceTableABT.h" +#include "Particle/SoaDistanceTableABTOMPTarget.h" + +namespace qmcplusplus +{ +/** Adding SymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> createDistanceTableAAT(ParticleSetT& s, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = s.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for similar particles (A-A):" << std::endl; + o << " source/target: " << s.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout" << std::endl; + + if (sc == SUPERCELL_BULK) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique>(s); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + else + { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + } + } + else if (sc == SUPERCELL_SLAB) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique>(s); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + else + { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + } + } + else if (sc == SUPERCELL_WIRE) + { + o << " Distance computations use periodic cell in one dimension." << std::endl; + dt = std::make_unique>(s); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." << std::endl; + dt = std::make_unique>(s); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> createDistanceTableAAT(ParticleSetT& t, + std::ostream& description); +template std::unique_ptr> createDistanceTableAAT(ParticleSetT& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableAAT>( + ParticleSetT>& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableAAT>( + ParticleSetT>& t, + std::ostream& description); + +/** Adding AsymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = t.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for dissimilar particles (A-B):" << std::endl; + o << " source: " << s.getName() << " target: " << t.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout" << std::endl; + + if (sc == SUPERCELL_BULK) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + } + } + else if (sc == SUPERCELL_SLAB) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + } + } + else if (sc == SUPERCELL_WIRE) + { + o << " Distance computations use periodic cell in one dimension." << std::endl; + dt = std::make_unique>(s, t); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." << std::endl; + dt = std::make_unique>(s, t); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); +template std::unique_ptr> createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableABT>( + const ParticleSetT>& s, + ParticleSetT>& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableABT>( + const ParticleSetT>& s, + ParticleSetT>& t, + std::ostream& description); +} // namespace qmcplusplus diff --git a/src/Particle/createDistanceTableT.h b/src/Particle/createDistanceTableT.h new file mode 100644 index 0000000000..882b5f4118 --- /dev/null +++ b/src/Particle/createDistanceTableT.h @@ -0,0 +1,83 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_DISTANCETABLET_H +#define QMCPLUSPLUS_DISTANCETABLET_H + +#include "Particle/ParticleSet.h" + +namespace qmcplusplus +{ +/** Class to manage multiple DistanceTable objects. + * + * \date 2008-09-19 + * static data members are removed. DistanceTable::add functions + * are kept for compatibility only. New codes should use a member function + * of ParticleSet to add a distance table + * int ParticleSet::addTable(const ParticleSet& source) + * + * \deprecated There is only one instance of the data memebers of + * DistanceTable in an application and the data are shared by many objects. + * Note that static data members and functions are used + * (based on singleton and factory patterns). + *\todo DistanceTable should work as a factory, as well, to instantiate + *DistanceTable subject to different boundary conditions. + * Lattice/CrystalLattice.h and Lattice/CrystalLattice.cpp can be owned by + *DistanceTable to generically control the crystalline structure. + */ + +/// free function to create a distable table of s-s +template +std::unique_ptr> createDistanceTableAAT(ParticleSetT& s, std::ostream& description); + +template +std::unique_ptr> createDistanceTableAATOMPTarget(ParticleSetT& s, std::ostream& description); + +template +inline std::unique_ptr> createDistanceTableT(ParticleSetT& s, std::ostream& description) +{ + // during P-by-P move, the cost of single particle evaluation of distance + // tables is determined by the number of source particles. Thus the + // implementation selection is determined by the source particle set. + if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) + return createDistanceTableAATOMPTarget(s, description); + else + return createDistanceTableAAT(s, description); +} + +/// free function create a distable table of s-t +template +std::unique_ptr> createDistanceTableABT(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); + +template +std::unique_ptr> createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); + +template +inline std::unique_ptr> createDistanceTableT(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description) +{ + // during P-by-P move, the cost of single particle evaluation of distance + // tables is determined by the number of source particles. Thus the + // implementation selection is determined by the source particle set. + if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD) + return createDistanceTableABTOMPTarget(s, t, description); + else + return createDistanceTableABT(s, t, description); +} + +} // namespace qmcplusplus +#endif diff --git a/src/Particle/createDistanceTableTOMPTarget.cpp b/src/Particle/createDistanceTableTOMPTarget.cpp new file mode 100644 index 0000000000..0f6e5a27fd --- /dev/null +++ b/src/Particle/createDistanceTableTOMPTarget.cpp @@ -0,0 +1,228 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "Particle/createDistanceTableT.h" + +#include "CPU/SIMD/algorithm.hpp" +#include "Particle/DistanceTableT.h" +#include "Particle/SoaDistanceTableAATOMPTarget.h" +#include "Particle/SoaDistanceTableABTOMPTarget.h" + +namespace qmcplusplus +{ +/** Adding SymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> createDistanceTableAATOMPTarget(ParticleSetT& s, std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = s.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for similar particles (A-A):" << std::endl; + o << " source/target: " << s.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; + + if (sc == SUPERCELL_BULK) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique>(s); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + else + { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + } + } + else if (sc == SUPERCELL_SLAB) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique>(s); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + else + { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique>(s); + } + } + } + else if (sc == SUPERCELL_WIRE) + { + o << " Distance computations use periodic cell in one dimension." << std::endl; + dt = std::make_unique>(s); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." << std::endl; + dt = std::make_unique>(s); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> createDistanceTableAATOMPTarget(ParticleSetT& t, + std::ostream& description); +template std::unique_ptr> createDistanceTableAATOMPTarget(ParticleSetT& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableAATOMPTarget>( + ParticleSetT>& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableAATOMPTarget>( + ParticleSetT>& t, + std::ostream& description); + +/** Adding AsymmetricDTD to the list, e.g., el-el distance table + *\param s source/target particle set + *\return index of the distance table with the name + */ +template +std::unique_ptr> createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description) +{ + using RealType = typename ParticleSetT::RealType; + enum + { + DIM = OHMMS_DIM + }; + const int sc = t.getLattice().SuperCellEnum; + std::unique_ptr> dt; + std::ostringstream o; + o << " Distance table for dissimilar particles (A-B):" << std::endl; + o << " source: " << s.getName() << " target: " << t.getName() << std::endl; + o << " Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl; + + if (sc == SUPERCELL_BULK) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic periodic cell in " + "3D." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "3D with corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + o << " Distance computations use general periodic cell in " + "3D without corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + } + } + else if (sc == SUPERCELL_SLAB) + { + if (s.getLattice().DiagonalOnly) + { + o << " Distance computations use orthorhombic code for periodic " + "cell in 2D." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius) + { + o << " Distance computations use general periodic cell in " + "2D with corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + else + { + o << " Distance computations use general periodic cell in " + "2D without corner image checks." + << std::endl; + dt = std::make_unique>(s, t); + } + } + } + else if (sc == SUPERCELL_WIRE) + { + o << " Distance computations use periodic cell in one dimension." << std::endl; + dt = std::make_unique>(s, t); + } + else // open boundary condition + { + o << " Distance computations use open boundary conditions in 3D." << std::endl; + dt = std::make_unique>(s, t); + } + + description << o.str() << std::endl; + return dt; +} + +template std::unique_ptr> createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); +template std::unique_ptr> createDistanceTableABTOMPTarget(const ParticleSetT& s, + ParticleSetT& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableABTOMPTarget>( + const ParticleSetT>& s, + ParticleSetT>& t, + std::ostream& description); +template std::unique_ptr>> createDistanceTableABTOMPTarget>( + const ParticleSetT>& s, + ParticleSetT>& t, + std::ostream& description); +} // namespace qmcplusplus diff --git a/src/Particle/tests/CMakeLists.txt b/src/Particle/tests/CMakeLists.txt index 4442e8ed22..bf3ebe04c9 100644 --- a/src/Particle/tests/CMakeLists.txt +++ b/src/Particle/tests/CMakeLists.txt @@ -34,7 +34,8 @@ set(UTEST_NAME deterministic-unit_${UTEST_EXE}) add_executable( ${UTEST_EXE} test_distance_table.cpp - test_SoaDistanceTableAA.cpp) + test_SoaDistanceTableAA.cpp + ) target_link_libraries(${UTEST_EXE} catch_main qmcparticle) if(USE_OBJECT_TARGET) target_link_libraries(${UTEST_EXE} qmcutil qmcparticle_omptarget) diff --git a/src/Particle/tests/test_SoaDistanceTableAA.cpp b/src/Particle/tests/test_SoaDistanceTableAA.cpp index a49bafcfec..320ff68b84 100644 --- a/src/Particle/tests/test_SoaDistanceTableAA.cpp +++ b/src/Particle/tests/test_SoaDistanceTableAA.cpp @@ -29,7 +29,7 @@ TEST_CASE("SoaDistanceTableAA compute_size", "[distance_table]") elec.create({6, 4}); // using open BC - SoaDistanceTableAA dt_ee(elec); + SoaDistanceTableAAT dt_ee(elec); const size_t Alignment = getAlignment(); diff --git a/src/Particle/tests/test_particle_pool.cpp b/src/Particle/tests/test_particle_pool.cpp index 71ed80f361..48a0aabc6d 100644 --- a/src/Particle/tests/test_particle_pool.cpp +++ b/src/Particle/tests/test_particle_pool.cpp @@ -16,7 +16,7 @@ #include "Message/Communicate.h" #include "OhmmsData/Libxml2Doc.h" #include "Particle/ParticleSetPool.h" - +#include "Particle/MCWalkerConfiguration.h" #include #include diff --git a/src/QMCDrivers/DMC/DMCFactoryNew.h b/src/QMCDrivers/DMC/DMCFactoryNew.h index 524f3f3a2f..913ea60680 100644 --- a/src/QMCDrivers/DMC/DMCFactoryNew.h +++ b/src/QMCDrivers/DMC/DMCFactoryNew.h @@ -15,10 +15,10 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCWaveFunctions/WaveFunctionPool.h" #include "Message/Communicate.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class MCPopulation; class ProjectData; diff --git a/src/QMCDrivers/MCPopulation.h b/src/QMCDrivers/MCPopulation.h index b19f043aa7..b66a86bca0 100644 --- a/src/QMCDrivers/MCPopulation.h +++ b/src/QMCDrivers/MCPopulation.h @@ -24,12 +24,7 @@ #include "QMCDrivers/WalkerElementsRef.h" #include "OhmmsPETE/OhmmsVector.h" #include "Utilities/FairDivide.h" - -// forward declaration -namespace optimize -{ -struct VariableSet; -} +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { @@ -38,13 +33,12 @@ class QMCHamiltonian; class MCPopulation { public: - using MCPWalker = Walker; + using MCPWalker = MCWalkerConfiguration::Walker_t; using WFBuffer = MCPWalker::WFBuffer_t; using RealType = QMCTraits::RealType; using Properties = MCPWalker::PropertyContainer_t; using IndexType = QMCTraits::IndexType; using FullPrecRealType = QMCTraits::FullPrecRealType; - using opt_variables_type = optimize::VariableSet; private: // Potential thread safety issue diff --git a/src/QMCDrivers/QMCDriver.h b/src/QMCDrivers/QMCDriver.h index 6f1f6bfdd3..2981558a59 100644 --- a/src/QMCDrivers/QMCDriver.h +++ b/src/QMCDrivers/QMCDriver.h @@ -35,6 +35,8 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" #include "QMCDrivers/SimpleFixedNodeBranch.h" +#include "Particle/MCWalkerConfiguration.h" + class Communicate; namespace qmcplusplus @@ -59,7 +61,6 @@ namespace qmcplusplus */ //forward declarations: Do not include headers if not needed -class MCWalkerConfiguration; class HDFWalkerOutput; class TraceManager; diff --git a/src/QMCDrivers/QMCDriverFactory.h b/src/QMCDrivers/QMCDriverFactory.h index c1dd191b8f..71560c8245 100644 --- a/src/QMCDrivers/QMCDriverFactory.h +++ b/src/QMCDrivers/QMCDriverFactory.h @@ -26,13 +26,13 @@ #include "QMCDrivers/MCPopulation.h" #include "Particle/ParticleSetPool.h" #include "Estimators/EstimatorManagerInput.h" +#include "Particle/MCWalkerConfiguration.h" class Communicate; namespace qmcplusplus { //forward declaration -class MCWalkerConfiguration; class QMCDriverInterface; class WaveFunctionPool; class HamiltonianPool; diff --git a/src/QMCDrivers/RMC/RMCFactory.h b/src/QMCDrivers/RMC/RMCFactory.h index fa52cfc6e5..90a12417de 100644 --- a/src/QMCDrivers/RMC/RMCFactory.h +++ b/src/QMCDrivers/RMC/RMCFactory.h @@ -14,10 +14,10 @@ #ifndef QMCPLUSPLUS_RMC_FACTORY_H #define QMCPLUSPLUS_RMC_FACTORY_H #include "QMCDrivers/QMCDriver.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class RMCFactory diff --git a/src/QMCDrivers/VMC/VMCBatched.h b/src/QMCDrivers/VMC/VMCBatched.h index 589e6ee6da..c404510167 100644 --- a/src/QMCDrivers/VMC/VMCBatched.h +++ b/src/QMCDrivers/VMC/VMCBatched.h @@ -18,7 +18,7 @@ #include "QMCDrivers/MCPopulation.h" #include "QMCDrivers/ContextForSteps.h" #include "QMCDrivers/GreenFunctionModifiers/DriftModifierBase.h" - +#include "Particle/SampleStack.h" #include "Utilities/Timer.h" namespace qmcplusplus diff --git a/src/QMCDrivers/VMC/VMCFactory.h b/src/QMCDrivers/VMC/VMCFactory.h index 42a277bf63..eca789f202 100644 --- a/src/QMCDrivers/VMC/VMCFactory.h +++ b/src/QMCDrivers/VMC/VMCFactory.h @@ -14,10 +14,10 @@ #ifndef QMCPLUSPLUS_VMC_FACTORY_H #define QMCPLUSPLUS_VMC_FACTORY_H #include "QMCDrivers/QMCDriver.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class VMCFactory diff --git a/src/QMCDrivers/VMC/VMCFactoryNew.h b/src/QMCDrivers/VMC/VMCFactoryNew.h index 2f79751df7..e1247a5d5a 100644 --- a/src/QMCDrivers/VMC/VMCFactoryNew.h +++ b/src/QMCDrivers/VMC/VMCFactoryNew.h @@ -18,11 +18,11 @@ #include "QMCDrivers/QMCDriverInterface.h" #include "QMCWaveFunctions/WaveFunctionPool.h" #include "Message/Communicate.h" - +#include "Particle/ParticleSetPool.h" +#include "Particle/SampleStack.h" namespace qmcplusplus { -class ParticleSetPool; class HamiltonianPool; class MCPopulation; class ProjectData; diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h index 14709d9bbb..cad2ccc400 100644 --- a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h +++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.h @@ -18,6 +18,7 @@ #include "QMCDrivers/WFOpt/QMCCostFunctionBase.h" #include "QMCDrivers/CloneManager.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "SampleStack.h" namespace qmcplusplus { diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp index 5c65f09cd7..f00bdcf1c3 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimize.cpp @@ -28,6 +28,7 @@ #include "CPU/Blasf.h" #include "Numerics/MatrixOperators.h" #include "Message/UniformCommunicateError.h" +#include "Particle/SampleStack.h" #include #ifdef HAVE_LMY_ENGINE #include "formic/utils/matrix.h" diff --git a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp index 186e725575..d431bb08ed 100644 --- a/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp +++ b/src/QMCDrivers/WFOpt/QMCFixedSampleLinearOptimizeBatched.cpp @@ -30,6 +30,8 @@ #include "Numerics/MatrixOperators.h" #include "EstimatorInputDelegates.h" #include "Message/UniformCommunicateError.h" +#include "Particle/SampleStack.h" + #include #ifdef HAVE_LMY_ENGINE #include "formic/utils/matrix.h" diff --git a/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h b/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h index d78ffb5ff4..010ef554ad 100644 --- a/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h +++ b/src/QMCDrivers/WFOpt/QMCWFOptFactoryNew.h @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_WFOPTFACTORYNEW_H #include "QMCDrivers/QMCDriverInterface.h" +#include "Particle/SampleStack.h" class Communicate; @@ -22,7 +23,6 @@ class MCPopulation; class WaveFunctionPool; class QMCHamiltonian; class TrialWaveFunction; -class SampleStack; class QMCFixedSampleLinearOptimizeBatched; class ProjectData; diff --git a/src/QMCDrivers/WalkerElementsRef.h b/src/QMCDrivers/WalkerElementsRef.h index 1a11de623c..d5d35a6bcc 100644 --- a/src/QMCDrivers/WalkerElementsRef.h +++ b/src/QMCDrivers/WalkerElementsRef.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2020 QMCPACK developers. // @@ -13,6 +13,7 @@ #define QMCPLUSPLUS_WALKERELEMENTSREF_H #include "Configuration.h" +#include "Particle/ParticleSetTraits.h" #include "Particle/Walker.h" namespace qmcplusplus @@ -22,28 +23,35 @@ class TrialWaveFunction; /** type for returning the walker and its elements from MCPopulation * - * have no expectations for the validity of the references in this structure past - * the context it was returned in. It should not be returned by a call in a + * have no expectations for the validity of the references in this structure + * past the context it was returned in. It should not be returned by a call in a * crowd or threaded context. - * + * * @ye-luo's "fat" walker * - * We need this if we want to "copyFrom" the whole fat walker when it comes off the line - * i.e. mpi. Insuring the "fat" walker is valid at the earliest possible point seems - * less likely to end in tears then just calling copyFrom random other places (hopefully) - * in time, in order to not access an invalid walker element. + * We need this if we want to "copyFrom" the whole fat walker when it comes off + * the line i.e. mpi. Insuring the "fat" walker is valid at the earliest + * possible point seems less likely to end in tears then just calling copyFrom + * random other places (hopefully) in time, in order to not access an invalid + * walker element. */ struct WalkerElementsRef { - /** to allow use of emplace back - */ - WalkerElementsRef(Walker& walker_in, ParticleSet& pset_in, TrialWaveFunction& twf_in) : walker(walker_in), pset(pset_in), twf(twf_in) {} -; - Walker& walker; - ParticleSet& pset; - TrialWaveFunction& twf; + using WalkerType = Walker, + LatticeParticleTraits>; + /** to allow use of emplace back + */ + WalkerElementsRef(WalkerType& walker_in, ParticleSet& pset_in, + TrialWaveFunction& twf_in) : + walker(walker_in), + pset(pset_in), + twf(twf_in){}; + + WalkerType& walker; + ParticleSet& pset; + TrialWaveFunction& twf; }; -} +} // namespace qmcplusplus #endif diff --git a/src/QMCDrivers/tests/SetupPools.h b/src/QMCDrivers/tests/SetupPools.h index ed43c7f1e1..88b5e12205 100644 --- a/src/QMCDrivers/tests/SetupPools.h +++ b/src/QMCDrivers/tests/SetupPools.h @@ -15,11 +15,10 @@ #include "Message/Communicate.h" #include "type_traits/template_types.hpp" #include "OhmmsData/Libxml2Doc.h" +#include "Particle/ParticleSetPool.h" namespace qmcplusplus { - -class ParticleSetPool; class WaveFunctionPool; class HamiltonianPool; diff --git a/src/QMCDrivers/tests/WalkerConsumer.h b/src/QMCDrivers/tests/WalkerConsumer.h index 689a2be280..91bc9675bf 100644 --- a/src/QMCDrivers/tests/WalkerConsumer.h +++ b/src/QMCDrivers/tests/WalkerConsumer.h @@ -1,6 +1,6 @@ ////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. // // Copyright (c) 2019 QMCPACK developers. // @@ -12,10 +12,12 @@ #ifndef QMCPLUSPLUS_WALKERCONSUMER_H #define QMCPLUSPLUS_WALKERCONSUMER_H -#include - +#include "Configuration.h" +#include "Particle/ParticleSetTraits.h" #include "Particle/Walker.h" +#include + namespace qmcplusplus { class ResourceCollection; @@ -29,32 +31,38 @@ namespace testing class WalkerConsumer { public: - std::vector>> walkers; - std::vector> walker_elecs_; - std::vector> walker_twfs_; - std::vector> walker_hamiltonians_; - - void initializeResources(const ResourceCollection& twf_resource) {} - - void addWalker(Walker& walker, - ParticleSet& elecs, - TrialWaveFunction& twf, - QMCHamiltonian& hamiltonian) - { - walkers.push_back(walker); - walker_elecs_.push_back(elecs); - walker_twfs_.push_back(twf); - walker_hamiltonians_.push_back(hamiltonian); - } - - void clearWalkers() - { - // We're clearing the refs to the objects not the referred to objects. - walkers.clear(); - walker_elecs_.clear(); - walker_twfs_.clear(); - walker_hamiltonians_.clear(); - } + using WalkerType = Walker, + LatticeParticleTraits>; + + std::vector> walkers; + std::vector> walker_elecs_; + std::vector> walker_twfs_; + std::vector> walker_hamiltonians_; + + void + initializeResources(const ResourceCollection& twf_resource) + { + } + + void + addWalker(WalkerType& walker, ParticleSet& elecs, TrialWaveFunction& twf, + QMCHamiltonian& hamiltonian) + { + walkers.push_back(walker); + walker_elecs_.push_back(elecs); + walker_twfs_.push_back(twf); + walker_hamiltonians_.push_back(hamiltonian); + } + + void + clearWalkers() + { + // We're clearing the refs to the objects not the referred to objects. + walkers.clear(); + walker_elecs_.clear(); + walker_twfs_.clear(); + walker_hamiltonians_.clear(); + } }; } // namespace testing diff --git a/src/QMCDrivers/tests/test_Crowd.cpp b/src/QMCDrivers/tests/test_Crowd.cpp index 96d5d98a1c..ea854406ce 100644 --- a/src/QMCDrivers/tests/test_Crowd.cpp +++ b/src/QMCDrivers/tests/test_Crowd.cpp @@ -29,7 +29,7 @@ namespace testing class CrowdWithWalkers { public: - using MCPWalker = Walker; + using MCPWalker = Crowd::MCPWalker; EstimatorManagerNew em; UPtr crowd_ptr; diff --git a/src/QMCDrivers/tests/test_DMCBatched.cpp b/src/QMCDrivers/tests/test_DMCBatched.cpp index 1a069efb3b..e846141452 100644 --- a/src/QMCDrivers/tests/test_DMCBatched.cpp +++ b/src/QMCDrivers/tests/test_DMCBatched.cpp @@ -20,6 +20,7 @@ #include "Concurrency/Info.hpp" #include "Concurrency/UtilityFunctions.hpp" #include "Platforms/Host/OutputManager.h" +#include "SampleStack.h" namespace qmcplusplus { diff --git a/src/QMCDrivers/tests/test_SFNBranch.cpp b/src/QMCDrivers/tests/test_SFNBranch.cpp index d2ccdc1d5d..5291e7d36f 100644 --- a/src/QMCDrivers/tests/test_SFNBranch.cpp +++ b/src/QMCDrivers/tests/test_SFNBranch.cpp @@ -26,7 +26,7 @@ namespace qmcplusplus { -using MCPWalker = Walker; +using MCPWalker = MCPopulation::MCPWalker; using RealType = double; namespace testing diff --git a/src/QMCHamiltonians/ECPotentialBuilder.h b/src/QMCHamiltonians/ECPotentialBuilder.h index aa0353dde3..e3b8ab8119 100644 --- a/src/QMCHamiltonians/ECPotentialBuilder.h +++ b/src/QMCHamiltonians/ECPotentialBuilder.h @@ -19,10 +19,10 @@ #include "QMCHamiltonians/NonLocalECPotential.h" #include "QMCHamiltonians/SOECPotential.h" #include "QMCHamiltonians/L2Potential.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { class QMCHamiltonian; -class ParticleSet; class TrialWaveFunction; struct ECPotentialBuilder : public MPIObjectBase, public QMCTraits diff --git a/src/QMCHamiltonians/HamiltonianPool.h b/src/QMCHamiltonians/HamiltonianPool.h index f3651b162e..5bbae1fd91 100644 --- a/src/QMCHamiltonians/HamiltonianPool.h +++ b/src/QMCHamiltonians/HamiltonianPool.h @@ -20,15 +20,16 @@ #include "QMCHamiltonians/HamiltonianFactory.h" #include "OhmmsData/OhmmsElementBase.h" #include "Message/MPIObjectBase.h" +#include "Particle/ParticleSet.h" +#include "Particle/ParticleSetPool.h" +#include "Particle/MCWalkerConfiguration.h" + #include struct Libxml2Document; namespace qmcplusplus { -class ParticleSet; -class MCWalkerConfiguration; -class ParticleSetPool; class WaveFunctionPool; /** @ingroup qmcapp diff --git a/src/QMCHamiltonians/NonLocalECPotential.h b/src/QMCHamiltonians/NonLocalECPotential.h index 40e5948712..52f7798174 100644 --- a/src/QMCHamiltonians/NonLocalECPotential.h +++ b/src/QMCHamiltonians/NonLocalECPotential.h @@ -22,6 +22,7 @@ #include "QMCHamiltonians/ForceBase.h" #include "QMCHamiltonians/NonLocalECPComponent.h" #include "Particle/NeighborLists.h" +#include "Particle/DistanceTable.h" namespace qmcplusplus { template diff --git a/src/QMCHamiltonians/OperatorBase.h b/src/QMCHamiltonians/OperatorBase.h index cd4641dd01..41c8cda129 100644 --- a/src/QMCHamiltonians/OperatorBase.h +++ b/src/QMCHamiltonians/OperatorBase.h @@ -34,12 +34,13 @@ #endif #include "QMCHamiltonians/Listener.hpp" #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "Particle/MCWalkerConfiguration.h" + #include #include // std::unique_ptr namespace qmcplusplus { -class MCWalkerConfiguration; /**@defgroup hamiltonian Hamiltonian group * @brief QMCHamiltonian and its component, OperatorBase diff --git a/src/QMCHamiltonians/QMCHamiltonian.h b/src/QMCHamiltonians/QMCHamiltonian.h index 11dda4ac9c..2b193e72dc 100644 --- a/src/QMCHamiltonians/QMCHamiltonian.h +++ b/src/QMCHamiltonians/QMCHamiltonian.h @@ -35,10 +35,10 @@ #include "Estimators/TraceManager.h" #endif #include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "Particle/MCWalkerConfiguration.h" namespace qmcplusplus { -class MCWalkerConfiguration; class HamiltonianFactory; class NonLocalECPotential; diff --git a/src/QMCWaveFunctions/BasisSetBase.h b/src/QMCWaveFunctions/BasisSetBase.h index 57d6fc3cf8..e8ccd8b0d1 100644 --- a/src/QMCWaveFunctions/BasisSetBase.h +++ b/src/QMCWaveFunctions/BasisSetBase.h @@ -21,8 +21,8 @@ #ifndef QMCPLUSPLUS_BASISSETBASE_H #define QMCPLUSPLUS_BASISSETBASE_H -#include "Particle/ParticleSet.h" -#include "Particle/VirtualParticleSet.h" +#include "Particle/ParticleSetT.h" +#include "Particle/VirtualParticleSetT.h" #include "QMCWaveFunctions/OrbitalSetTraits.h" #include "OMPTarget/OffloadAlignedAllocators.hpp" @@ -113,14 +113,14 @@ struct BasisSetBase : public OrbitalSetTraits ///resize the basis set virtual void setBasisSetSize(int nbs) = 0; - virtual void evaluateWithHessian(const ParticleSet& P, int iat) = 0; - virtual void evaluateWithThirdDeriv(const ParticleSet& P, int iat) = 0; - virtual void evaluateThirdDerivOnly(const ParticleSet& P, int iat) = 0; - virtual void evaluateForWalkerMove(const ParticleSet& P) = 0; - virtual void evaluateForWalkerMove(const ParticleSet& P, int iat) = 0; - virtual void evaluateForPtclMove(const ParticleSet& P, int iat) = 0; - virtual void evaluateAllForPtclMove(const ParticleSet& P, int iat) = 0; - virtual void evaluateForPtclMoveWithHessian(const ParticleSet& P, int iat) = 0; + virtual void evaluateWithHessian(const ParticleSetT& P, int iat) = 0; + virtual void evaluateWithThirdDeriv(const ParticleSetT& P, int iat) = 0; + virtual void evaluateThirdDerivOnly(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForWalkerMove(const ParticleSetT& P) = 0; + virtual void evaluateForWalkerMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateAllForPtclMove(const ParticleSetT& P, int iat) = 0; + virtual void evaluateForPtclMoveWithHessian(const ParticleSetT& P, int iat) = 0; }; /** Base for real basis set @@ -149,35 +149,35 @@ struct SoaBasisSetBase virtual void setBasisSetSize(int nbs) = 0; //Evaluates value, gradient, and laplacian for electron "iat". Parks them into a temporary data structure "vgl". - virtual void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) = 0; + virtual void evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) = 0; //Evaluates value, gradient, and laplacian for electron "iat". places them in a offload array for batched code. virtual void mw_evaluateVGL(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& vgl) = 0; //Evaluates value for electron "iat". places it in a offload array for batched code. virtual void mw_evaluateValue(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& v) = 0; //Evaluates value for all the electrons of the virtual particles. places it in a offload array for batched code. virtual void mw_evaluateValueVPs(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& vp_list, OffloadMWVArray& v) = 0; //Evaluates value, gradient, and Hessian for electron "iat". Parks them into a temporary data structure "vgh". - virtual void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) = 0; + virtual void evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) = 0; //Evaluates value, gradient, and Hessian, and Gradient Hessian for electron "iat". Parks them into a temporary data structure "vghgh". - virtual void evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) = 0; + virtual void evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) = 0; //Evaluates the x,y, and z components of ionic gradient associated with "jion" of value. Parks the raw data into "vgl" container. - virtual void evaluateGradSourceV(const ParticleSet& P, int iat, const ParticleSet& ions, int jion, vgl_type& vgl) = 0; + virtual void evaluateGradSourceV(const ParticleSetT& P, int iat, const ParticleSetT& ions, int jion, vgl_type& vgl) = 0; //Evaluates the x,y, and z components of ionic gradient associated with "jion" value, gradient, and laplacian. // Parks the raw data into "vghgh" container. - virtual void evaluateGradSourceVGL(const ParticleSet& P, + virtual void evaluateGradSourceVGL(const ParticleSetT& P, int iat, - const ParticleSet& ions, + const ParticleSetT& ions, int jion, vghgh_type& vghgh) = 0; - virtual void evaluateV(const ParticleSet& P, int iat, value_type* restrict vals) = 0; + virtual void evaluateV(const ParticleSetT& P, int iat, value_type* restrict vals) = 0; virtual bool is_S_orbital(int mo_idx, int ao_idx) { return false; } diff --git a/src/QMCWaveFunctions/BsplineFactory/ApplyPhaseC2R.hpp b/src/QMCWaveFunctions/BsplineFactory/ApplyPhaseC2R.hpp index 66b0edd94a..dc2114715f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/ApplyPhaseC2R.hpp +++ b/src/QMCWaveFunctions/BsplineFactory/ApplyPhaseC2R.hpp @@ -11,6 +11,7 @@ #include "OMPTarget/OMPTargetMath.hpp" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h index 2a69c08aaf..9804bf6336 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h @@ -20,179 +20,12 @@ #ifndef QMCPLUSPLUS_BSPLINE_READER_BASE_H #define QMCPLUSPLUS_BSPLINE_READER_BASE_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" -#include +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" namespace qmcplusplus { -struct SPOSetInputInfo; - -/** - * Each SplineC2X needs a reader derived from BsplineReaderBase. - * This base class handles common chores - * - check_twists : read gvectors, set twists for folded bands if needed, and set the phase for the special K - * - set_grid : create the basic grid and boundary conditions for einspline - * Note that template is abused but it works. - */ -struct BsplineReaderBase -{ - ///pointer to the EinsplineSetBuilder - EinsplineSetBuilder* mybuilder; - ///communicator - Communicate* myComm; - ///mesh size - TinyVector MeshSize; - ///check the norm of orbitals - bool checkNorm; - ///save spline coefficients to storage - bool saveSplineCoefs; - ///apply orbital rotations - bool rotate; - ///map from spo index to band index - std::vector> spo2band; - - BsplineReaderBase(EinsplineSetBuilder* e); - - virtual ~BsplineReaderBase(); - - /** read gvectors and set the mesh, and prepare for einspline - */ - template - inline bool set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) - { - //This sets MeshSize from the input file - bool havePsig = mybuilder->ReadGvectors_ESHDF(); - - //If this MeshSize is not initialized, use the meshsize set by the input based on FFT grid and meshfactor - if (MeshSize[0] == 0) - MeshSize = mybuilder->MeshSize; - - app_log() << " Using meshsize=" << MeshSize << "\n vs input meshsize=" << mybuilder->MeshSize << std::endl; - - for (int j = 0; j < 3; ++j) - { - xyz_grid[j].start = 0.0; - xyz_grid[j].end = 1.0; - xyz_grid[j].num = MeshSize[j]; - - if (halfg[j]) - { - xyz_bc[j].lCode = ANTIPERIODIC; - xyz_bc[j].rCode = ANTIPERIODIC; - } - else - { - xyz_bc[j].lCode = PERIODIC; - xyz_bc[j].rCode = PERIODIC; - } - - xyz_bc[j].lVal = 0.0; - xyz_bc[j].rVal = 0.0; - } - return havePsig; - } - - /** initialize twist-related data for N orbitals - */ - template - inline void check_twists(SPE* bspline, const BandInfoGroup& bandgroup) - { - //init(orbitalSet,bspline); - bspline->PrimLattice = mybuilder->PrimCell; - bspline->GGt = dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); - - int N = bandgroup.getNumDistinctOrbitals(); - int numOrbs = bandgroup.getNumSPOs(); - - bspline->setOrbitalSetSize(numOrbs); - bspline->resizeStorage(N, N); - - bspline->first_spo = bandgroup.getFirstSPO(); - bspline->last_spo = bandgroup.getLastSPO(); - - int num = 0; - const std::vector& cur_bands = bandgroup.myBands; - for (int iorb = 0; iorb < N; iorb++) - { - int ti = cur_bands[iorb].TwistIndex; - bspline->kPoints[iorb] = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); - bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; - num += bspline->MakeTwoCopies[iorb] ? 2 : 1; - } - - app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs << std::endl; - - bspline->HalfG = 0; - TinyVector bconds = mybuilder->TargetPtcl.getLattice().BoxBConds; - if (!bspline->isComplex()) - { - //no k-point folding, single special k point (G, L ...) - TinyVector twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex]; - for (int i = 0; i < 3; i++) - if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) - bspline->HalfG[i] = 1; - else - bspline->HalfG[i] = 0; - app_log() << " TwistIndex = " << cur_bands[0].TwistIndex << " TwistAngle " << twist0 << std::endl; - app_log() << " HalfG = " << bspline->HalfG << std::endl; - } - app_log().flush(); - } - - /** return the path name in hdf5 - */ - inline std::string psi_g_path(int ti, int spin, int ib) - { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_g"; - return path.str(); - } - - /** return the path name in hdf5 - */ - inline std::string psi_r_path(int ti, int spin, int ib) - { - std::ostringstream path; - path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_r"; - return path.str(); - } - - /** read/bcast psi_g - * @param ti twist index - * @param spin spin index - * @param ib band index - * @param cG psi_g as stored in hdf5 - */ - void get_psi_g(int ti, int spin, int ib, Vector>& cG); - - /** create the actual spline sets - */ - virtual std::unique_ptr create_spline_set(const std::string& my_name, - int spin, - const BandInfoGroup& bandgroup) = 0; - - /** setting common parameters - */ - void setCommon(xmlNodePtr cur); - - /** create the spline after one of the kind is created */ - std::unique_ptr create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); - - /** create the spline set */ - std::unique_ptr create_spline_set(int spin, xmlNodePtr cur); - - /** Set the checkNorm variable */ - inline void setCheckNorm(bool new_checknorm) { checkNorm = new_checknorm; }; - - /** Set the orbital rotation flag. Rotations are applied to balance the real/imaginary components. */ - inline void setRotate(bool new_rotate) { rotate = new_rotate; }; - - void initialize_spo2band(int spin, - const std::vector& bigspace, - SPOSetInfo& sposet, - std::vector& band2spo); -}; +using BsplineReaderBase = BsplineReaderBaseT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp similarity index 77% rename from src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp rename to src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp index 0c5cf0d2c9..ac7f6c3239 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp @@ -12,28 +12,26 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "BsplineReaderBaseT.h" -/** @file BsplineReaderBase.cpp - * - * Implement super function - */ -#include "EinsplineSetBuilder.h" -#include "BsplineReaderBase.h" -#include "OhmmsData/AttributeSet.h" #include "Message/CommOperators.h" +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" #include #include namespace qmcplusplus { -BsplineReaderBase::BsplineReaderBase(EinsplineSetBuilder* e) +template +BsplineReaderBaseT::BsplineReaderBaseT(EinsplineSetBuilderT* e) : mybuilder(e), MeshSize(0), checkNorm(true), saveSplineCoefs(false), rotate(true) { myComm = mybuilder->getCommunicator(); } -void BsplineReaderBase::get_psi_g(int ti, int spin, int ib, Vector>& cG) +template +void BsplineReaderBaseT::get_psi_g(int ti, int spin, int ib, Vector>& cG) { int ncg = 0; if (myComm->rank() == 0) @@ -50,7 +48,9 @@ void BsplineReaderBase::get_psi_g(int ti, int spin, int ib, Vectorbcast(cG); } -BsplineReaderBase::~BsplineReaderBase() {} +template +BsplineReaderBaseT::~BsplineReaderBaseT() +{} inline std::string make_bandinfo_filename(const std::string& root, int spin, @@ -67,7 +67,6 @@ inline std::string make_bandinfo_filename(const std::string& root, return oo.str(); } - inline std::string make_bandgroup_name(const std::string& root, int spin, int twist, @@ -82,7 +81,8 @@ inline std::string make_bandgroup_name(const std::string& root, return oo.str(); } -void BsplineReaderBase::setCommon(xmlNodePtr cur) +template +void BsplineReaderBaseT::setCommon(xmlNodePtr cur) { // check orbital normalization by default std::string checkOrbNorm("yes"); @@ -101,7 +101,8 @@ void BsplineReaderBase::setCommon(xmlNodePtr cur) saveSplineCoefs = saveCoefs == "yes"; } -std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePtr cur) +template +std::unique_ptr> BsplineReaderBaseT::create_spline_set(int spin, xmlNodePtr cur) { int ns(0); std::string spo_object_name; @@ -137,7 +138,10 @@ std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePt return create_spline_set(spo_object_name, spin, vals); } -std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info) +template +std::unique_ptr> BsplineReaderBaseT::create_spline_set(int spin, + xmlNodePtr cur, + SPOSetInputInfo& input_info) { std::string spo_object_name; OhmmsAttributeSet a; @@ -171,16 +175,17 @@ std::unique_ptr BsplineReaderBase::create_spline_set(int spin, xmlNodePt } /** build index tables to map a state to band with k-point folidng - * @param bigspace full BandInfo constructed by EinsplineSetBuilder - * @param sposet SPOSetInfo owned by someone, most likely EinsplinseSetBuilder - * @param spo2band spo2band[i] is the index in bigspace - * - * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i - */ -void BsplineReaderBase::initialize_spo2band(int spin, - const std::vector& bigspace, - SPOSetInfo& sposet, - std::vector& spo2band) + * @param bigspace full BandInfo constructed by EinsplineSetBuilder + * @param sposet SPOSetInfo owned by someone, most likely EinsplinseSetBuilder + * @param spo2band spo2band[i] is the index in bigspace + * + * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i + */ +template +void BsplineReaderBaseT::initialize_spo2band(int spin, + const std::vector& bigspace, + SPOSetInfo& sposet, + std::vector& spo2band) { spo2band.reserve(bigspace.size()); int ns = 0; @@ -199,7 +204,7 @@ void BsplineReaderBase::initialize_spo2band(int spin, } } - //write to a file + // write to a file const Communicate* comm = myComm; if (comm->rank()) return; @@ -212,15 +217,16 @@ void BsplineReaderBase::initialize_spo2band(int spin, std::array s; ns = 0; using PosType = QMCTraits::PosType; - o << "# Band State TwistIndex BandIndex Energy Kx Ky Kz K1 K2 K3 KmK " + o << "# Band State TwistIndex BandIndex Energy Kx Ky " + "Kz K1 K2 K3 KmK " << std::endl; for (int i = 0; i < bigspace.size(); ++i) { - int ti = bigspace[i].TwistIndex; - int bi = bigspace[i].BandIndex; - double e = bigspace[i].Energy; - int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; - PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); + int ti = bigspace[i].TwistIndex; + int bi = bigspace[i].BandIndex; + double e = bigspace[i].Energy; + int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1; + PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]); int s_size = std::snprintf(s.data(), s.size(), "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n", i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->primcell_kpoints[ti][0], mybuilder->primcell_kpoints[ti][1], mybuilder->primcell_kpoints[ti][2], nd); @@ -230,4 +236,10 @@ void BsplineReaderBase::initialize_spo2band(int spin, ns += nd; } } + +template class BsplineReaderBaseT; +template class BsplineReaderBaseT; +template class BsplineReaderBaseT>; +template class BsplineReaderBaseT>; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h new file mode 100644 index 0000000000..4da860e245 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h @@ -0,0 +1,203 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_BSPLINE_READER_BASET_H +#define QMCPLUSPLUS_BSPLINE_READER_BASET_H + +#include "Containers/OhmmsPETE/TinyVector.h" +#include "QMCWaveFunctions/BandInfo.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "mpi/collectives.h" +#include "mpi/point2point.h" +#include + +namespace qmcplusplus +{ +struct SPOSetInputInfo; +template +class EinsplineSetBuilderT; + +/** + * Each SplineC2X needs a reader derived from BsplineReaderBase. + * This base class handles common chores + * - check_twists : read gvectors, set twists for folded bands if needed, and + * set the phase for the special K + * - set_grid : create the basic grid and boundary conditions for einspline + * Note that template is abused but it works. + */ +template +class BsplineReaderBaseT +{ +public: + /// pointer to the EinsplineSetBuilder + EinsplineSetBuilderT* mybuilder; + /// communicator + Communicate* myComm; + /// mesh size + TinyVector MeshSize; + /// check the norm of orbitals + bool checkNorm; + /// save spline coefficients to storage + bool saveSplineCoefs; + /// apply orbital rotations + bool rotate; + /// map from spo index to band index + std::vector> spo2band; + + BsplineReaderBaseT(EinsplineSetBuilderT* e); + + virtual ~BsplineReaderBaseT(); + + /** read gvectors and set the mesh, and prepare for einspline + */ + template + inline bool set_grid(const TinyVector& halfg, GT* xyz_grid, BCT* xyz_bc) + { + // This sets MeshSize from the input file + bool havePsig = mybuilder->ReadGvectors_ESHDF(); + + // If this MeshSize is not initialized, use the meshsize set by the + // input based on FFT grid and meshfactor + if (MeshSize[0] == 0) + MeshSize = mybuilder->MeshSize; + + app_log() << " Using meshsize=" << MeshSize << "\n vs input meshsize=" << mybuilder->MeshSize << std::endl; + + for (int j = 0; j < 3; ++j) + { + xyz_grid[j].start = 0.0; + xyz_grid[j].end = 1.0; + xyz_grid[j].num = MeshSize[j]; + + if (halfg[j]) + { + xyz_bc[j].lCode = ANTIPERIODIC; + xyz_bc[j].rCode = ANTIPERIODIC; + } + else + { + xyz_bc[j].lCode = PERIODIC; + xyz_bc[j].rCode = PERIODIC; + } + + xyz_bc[j].lVal = 0.0; + xyz_bc[j].rVal = 0.0; + } + return havePsig; + } + + /** initialize twist-related data for N orbitals + */ + template + inline void check_twists(SPE* bspline, const BandInfoGroup& bandgroup) + { + // init(orbitalSet,bspline); + bspline->PrimLattice = mybuilder->PrimCell; + bspline->GGt = dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G); + + int N = bandgroup.getNumDistinctOrbitals(); + int numOrbs = bandgroup.getNumSPOs(); + + bspline->setOrbitalSetSize(numOrbs); + bspline->resizeStorage(N, N); + + bspline->first_spo = bandgroup.getFirstSPO(); + bspline->last_spo = bandgroup.getLastSPO(); + + int num = 0; + const std::vector& cur_bands = bandgroup.myBands; + for (int iorb = 0; iorb < N; iorb++) + { + int ti = cur_bands[iorb].TwistIndex; + bspline->kPoints[iorb] = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]); + bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies; + num += bspline->MakeTwoCopies[iorb] ? 2 : 1; + } + + app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs << std::endl; + + bspline->HalfG = 0; + TinyVector bconds = mybuilder->TargetPtcl.getLattice().BoxBConds; + if (!bspline->isComplex()) + { + // no k-point folding, single special k point (G, L ...) + TinyVector twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex]; + for (int i = 0; i < 3; i++) + if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8))) + bspline->HalfG[i] = 1; + else + bspline->HalfG[i] = 0; + app_log() << " TwistIndex = " << cur_bands[0].TwistIndex << " TwistAngle " << twist0 << std::endl; + app_log() << " HalfG = " << bspline->HalfG << std::endl; + } + app_log().flush(); + } + + /** return the path name in hdf5 + */ + inline std::string psi_g_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_g"; + return path.str(); + } + + /** return the path name in hdf5 + */ + inline std::string psi_r_path(int ti, int spin, int ib) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_r"; + return path.str(); + } + + /** read/bcast psi_g + * @param ti twist index + * @param spin spin index + * @param ib band index + * @param cG psi_g as stored in hdf5 + */ + void get_psi_g(int ti, int spin, int ib, Vector>& cG); + + /** create the actual spline sets + */ + virtual std::unique_ptr> create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) = 0; + + /** setting common parameters + */ + void setCommon(xmlNodePtr cur); + + /** create the spline after one of the kind is created */ + std::unique_ptr> create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info); + + /** create the spline set */ + std::unique_ptr> create_spline_set(int spin, xmlNodePtr cur); + + /** Set the checkNorm variable */ + inline void setCheckNorm(bool new_checknorm) { checkNorm = new_checknorm; }; + + /** Set the orbital rotation flag. Rotations are applied to balance the + * real/imaginary components. */ + inline void setRotate(bool new_rotate) { rotate = new_rotate; }; + + void initialize_spo2band(int spin, + const std::vector& bigspace, + SPOSetInfo& sposet, + std::vector& band2spo); +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h index 6a5e880b0d..b219d91101 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSet.h @@ -21,230 +21,12 @@ #ifndef QMCPLUSPLUS_BSPLINESET_H #define QMCPLUSPLUS_BSPLINESET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "spline/einspline_engine.hpp" -#include "spline/einspline_util.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" namespace qmcplusplus { -/** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R. - * Its derived template classes manage the storage and evaluation at given precision. - * BsplineSet also implements a few fallback routines in case optimized implementation is not necessary in the derived class. - */ -class BsplineSet : public SPOSet -{ -protected: - static const int D = DIM; - ///Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases - size_t MyIndex; - ///first index of the SPOs this Spline handles - size_t first_spo; - ///last index of the SPOs this Spline handles - size_t last_spo; - ///sign bits at the G/2 boundaries - TinyVector HalfG; - ///flags to unpack sin/cos - std::vector MakeTwoCopies; - /** kpoints for each unique orbitals. - * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated. - * Changing the sign requires updating all the evaluation code. - */ - std::vector kPoints; - ///remap splines to orbitals - aligned_vector BandIndexMap; - ///band offsets used for communication - std::vector offset; - -public: - BsplineSet(const std::string& my_name) : SPOSet(my_name), MyIndex(0), first_spo(0), last_spo(0) {} - - virtual bool isComplex() const = 0; - virtual std::string getKeyword() const = 0; - - auto& getHalfG() const { return HalfG; } - - inline void init_base(int n) - { - kPoints.resize(n); - MakeTwoCopies.resize(n); - BandIndexMap.resize(n); - for (int i = 0; i < n; i++) - BandIndexMap[i] = i; - } - - ///remap kpoints to group general kpoints & special kpoints - int remap_kpoints() - { - std::vector k_copy(kPoints); - const int nk = kPoints.size(); - int nCB = 0; - //two pass - for (int i = 0; i < nk; ++i) - { - if (MakeTwoCopies[i]) - { - kPoints[nCB] = k_copy[i]; - BandIndexMap[nCB++] = i; - } - } - int nRealBands = nCB; - for (int i = 0; i < nk; ++i) - { - if (!MakeTwoCopies[i]) - { - kPoints[nRealBands] = k_copy[i]; - BandIndexMap[nRealBands++] = i; - } - } - return nCB; //return the number of complex bands - } - - // propagate SPOSet virtual functions - using SPOSet::evaluateDetRatios; - using SPOSet::evaluateValue; - using SPOSet::evaluateVGH; - using SPOSet::evaluateVGHGH; - using SPOSet::evaluateVGL; - using SPOSet::finalizeConstruction; - using SPOSet::mw_evaluateDetRatios; - using SPOSet::mw_evaluateVGL; - using SPOSet::mw_evaluateVGLandDetRatioGrads; - - using SPOSet::acquireResource; - using SPOSet::createResource; - using SPOSet::releaseResource; - - std::unique_ptr makeClone() const override = 0; - - void setOrbitalSetSize(int norbs) override { OrbitalSetSize = norbs; } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } - } - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override - { - assert(this == &spo_list.getLeader()); - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - - const size_t nw = spo_list.size(); - std::vector mw_psi_v; - std::vector mw_dpsi_v; - std::vector mw_d2psi_v; - RefVector psi_v_list; - RefVector dpsi_v_list; - RefVector d2psi_v_list; - mw_psi_v.reserve(nw); - mw_dpsi_v.reserve(nw); - mw_d2psi_v.reserve(nw); - psi_v_list.reserve(nw); - dpsi_v_list.reserve(nw); - d2psi_v_list.reserve(nw); - - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - mw_psi_v.clear(); - mw_dpsi_v.clear(); - mw_d2psi_v.clear(); - psi_v_list.clear(); - dpsi_v_list.clear(); - d2psi_v_list.clear(); - - for (int iw = 0; iw < nw; iw++) - { - mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols()); - mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); - mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); - psi_v_list.push_back(mw_psi_v.back()); - dpsi_v_list.push_back(mw_dpsi_v.back()); - d2psi_v_list.push_back(mw_d2psi_v.back()); - } - - mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - evaluateVGH(P, iat, v, g, h); - } - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); - GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); - evaluateVGHGH(P, iat, v, g, h, gh); - } - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - //Do nothing, since Einsplines don't explicitly depend on ion positions. - } - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using BsplineSet = BsplineSetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h new file mode 100644 index 0000000000..60d13101b3 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h @@ -0,0 +1,256 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file BsplineSetT.h + * + * BsplineSet is a SPOSet derived class and serves as a base class for B-spline + * SPO C2C/C2R/R2R implementation + */ +#ifndef QMCPLUSPLUS_BSPLINESETT_H +#define QMCPLUSPLUS_BSPLINESETT_H + +#include "QMCWaveFunctions/SPOSetT.h" +#include "spline/einspline_engine.hpp" +#include "spline/einspline_util.hpp" + +namespace qmcplusplus +{ +/** BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R. + * Its derived template classes manage the storage and evaluation at given + * precision. BsplineSet also implements a few fallback routines in case + * optimized implementation is not necessary in the derived class. + */ +template +class BsplineSetT : public SPOSetT +{ +public: + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using GGGVector = typename SPOSetT::GGGVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + + using value_type = typename SPOSetT::ValueMatrix::value_type; + using grad_type = typename SPOSetT::GradMatrix::value_type; + + // used in derived classes + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; + + BsplineSetT(const std::string& my_name) : SPOSetT(my_name), MyIndex(0), first_spo(0), last_spo(0) {} + + virtual bool isComplex() const = 0; + virtual std::string getKeyword() const = 0; + + auto& getHalfG() const { return HalfG; } + + inline void init_base(int n) + { + kPoints.resize(n); + MakeTwoCopies.resize(n); + BandIndexMap.resize(n); + for (int i = 0; i < n; i++) + BandIndexMap[i] = i; + } + + /// remap kpoints to group general kpoints & special kpoints + int remap_kpoints() + { + std::vector k_copy(kPoints); + const int nk = kPoints.size(); + int nCB = 0; + // two pass + for (int i = 0; i < nk; ++i) + { + if (MakeTwoCopies[i]) + { + kPoints[nCB] = k_copy[i]; + BandIndexMap[nCB++] = i; + } + } + int nRealBands = nCB; + for (int i = 0; i < nk; ++i) + { + if (!MakeTwoCopies[i]) + { + kPoints[nRealBands] = k_copy[i]; + BandIndexMap[nRealBands++] = i; + } + } + return nCB; // return the number of complex bands + } + + std::unique_ptr> makeClone() const override = 0; + + void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + this->evaluateVGL(P, iat, v, g, l); + } + } + + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override + { + assert(this == &spo_list.getLeader()); + const size_t nw = spo_list.size(); + std::vector mw_psi_v; + std::vector mw_dpsi_v; + std::vector mw_d2psi_v; + RefVector psi_v_list; + RefVector dpsi_v_list; + RefVector d2psi_v_list; + mw_psi_v.reserve(nw); + mw_dpsi_v.reserve(nw); + mw_d2psi_v.reserve(nw); + psi_v_list.reserve(nw); + dpsi_v_list.reserve(nw); + d2psi_v_list.reserve(nw); + + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + mw_psi_v.clear(); + mw_dpsi_v.clear(); + mw_d2psi_v.clear(); + psi_v_list.clear(); + dpsi_v_list.clear(); + d2psi_v_list.clear(); + + for (int iw = 0; iw < nw; iw++) + { + mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols()); + mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols()); + mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols()); + psi_v_list.push_back(mw_psi_v.back()); + dpsi_v_list.push_back(mw_dpsi_v.back()); + d2psi_v_list.push_back(mw_d2psi_v.back()); + } + + this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + this->evaluateVGH(P, iat, v, g, h); + } + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override + { + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols()); + GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols()); + this->evaluateVGHGH(P, iat, v, g, h, gh); + } + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override + { + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) override + { + // Do nothing, since Einsplines don't explicitly depend on ion + // positions. + } + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; + template + friend class HybridRepSetReaderT; + +protected: + static const int D = QMCTraits::DIM; + /// Index of this adoptor, when multiple adoptors are used for NUMA or + /// distributed cases + size_t MyIndex; + /// first index of the SPOs this Spline handles + size_t first_spo; + /// last index of the SPOs this Spline handles + size_t last_spo; + /// sign bits at the G/2 boundaries + TinyVector HalfG; + /// flags to unpack sin/cos + std::vector MakeTwoCopies; + /** kpoints for each unique orbitals. + * Note: for historic reason, this sign is opposite to what was used in DFT + * when orbitals were generated. Changing the sign requires updating all the + * evaluation code. + */ + std::vector kPoints; + /// remap splines to orbitals + aligned_vector BandIndexMap; + /// band offsets used for communication + std::vector offset; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h index f7794dbba4..d44e0b20cc 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder.h @@ -23,269 +23,15 @@ #ifndef QMCPLUSPLUS_EINSPLINE_SET_BUILDER_H #define QMCPLUSPLUS_EINSPLINE_SET_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/BandInfo.h" -#include -#include - -#define PW_COEFF_NORM_TOLERANCE 1e-6 - -class Communicate; +#include "Configuration.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" namespace qmcplusplus { -///forward declaration of BsplineReaderBase -struct BsplineReaderBase; - -// Helper needed for TwistMap -struct Int3less -{ - bool operator()(const TinyVector& a, const TinyVector& b) const - { - if (a[0] > b[0]) - return false; - if (a[0] < b[0]) - return true; - if (a[1] > b[1]) - return false; - if (a[1] < b[1]) - return true; - if (a[2] > b[2]) - return false; - if (a[2] < b[2]) - return true; - return false; - } -}; -struct Int4less -{ - bool operator()(const TinyVector& a, const TinyVector& b) const - { - for (int i = 0; i < 4; i++) - { - if (a[i] > b[i]) - return false; - if (a[i] < b[i]) - return true; - } - return false; - } -}; - - -/** construct a name for spline SPO set - */ -struct H5OrbSet -{ - ///index for the spin set - int SpinSet; - ///number of orbitals that belong to this set - int NumOrbs; - ///name of the HDF5 file - std::filesystem::path FileName; - /** true if a < b - * - * The ordering - * - name - * - spin set - * - number of orbitals - */ - bool operator()(const H5OrbSet& a, const H5OrbSet& b) const - { - if (a.FileName == b.FileName) - { - if (a.SpinSet == b.SpinSet) - return a.NumOrbs < b.NumOrbs; - else - return a.SpinSet < b.SpinSet; - } - else - return a.FileName < b.FileName; - } - - H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) - : SpinSet(spinSet), NumOrbs(numOrbs), FileName(std::move(name)) - {} - H5OrbSet() = default; -}; - /** EinsplineSet builder */ -class EinsplineSetBuilder : public SPOSetBuilder -{ -public: - using PSetMap = std::map>; - using UnitCellType = CrystalLattice; - - ///reference to the particleset pool - const PSetMap& ParticleSets; - ///quantum particle set - ParticleSet& TargetPtcl; - ///ionic system - ParticleSet* SourcePtcl; - - /** Helper vector for sorting bands - */ - std::vector>> FullBands; - - /// reader to use BsplineReaderBase - std::unique_ptr MixedSplineReader; - - ///This is true if we have the orbital derivatives w.r.t. the ion positions - bool HaveOrbDerivs; - ///root XML node with href, sort, tilematrix, twistnum, source, precision,truncate,version - xmlNodePtr XMLRoot; - - std::map SPOSetMap; - - ///constructor - EinsplineSetBuilder(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur); - - ///destructor - ~EinsplineSetBuilder() override; - - /** initialize the Antisymmetric wave function for electrons - * @param cur the current xml node - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - /** initialize with the existing SPOSet */ - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) override; - - ////////////////////////////////////// - // HDF5-related data and functions // - ////////////////////////////////////// - hdf_archive H5File; - std::filesystem::path H5FileName; - // HDF5 orbital file version - typedef enum - { - QMCPACK, - ESHDF - } FormatType; - FormatType Format; - TinyVector Version; - std::string parameterGroup, ionsGroup, eigenstatesGroup; - std::vector Occ; - bool ReadOrbitalInfo(bool skipChecks = false); - bool ReadOrbitalInfo_ESHDF(bool skipChecks = false); - void BroadcastOrbitalInfo(); - bool CheckLattice(); - - /** read gvectors for each twist - * @return true, if psi_g is found - */ - bool ReadGvectors_ESHDF(); - - Tensor Lattice, RecipLattice, LatticeInv, SuperLattice, GGt; - UnitCellType SuperCell, PrimCell, PrimCellInv; - int NumBands, NumElectrons, NumSpins, NumTwists; - int MaxNumGvecs; - double MeshFactor; - RealType MatchingTol; - TinyVector MeshSize; - std::vector>> Gvecs; - - Vector IonTypes; - Vector> IonPos; - // mapping the ions in the supercell to the primitive cell - std::vector Super2Prim; - - ///////////////////////////// - // Twist angle information // - ///////////////////////////// - // The "true" twist number after analyzing twistnum, twist XML input and h5 - int twist_num_; - // primitive cell k-points from DFT calculations - std::vector> primcell_kpoints; - // primitive cell to supercell tiling matrix - Tensor TileMatrix; - // This vector stores which twist indices will be used by this clone - std::vector> UseTwists; - std::vector IncludeTwists, DistinctTwists; - /// if false, splines are conceptually complex valued - bool use_real_splines_; - int NumDistinctOrbitals; - // This is true if the corresponding twist in DistinctTwists should - // should be used to generate two distinct orbitals from the real and - // imaginary parts. - std::vector MakeTwoCopies; - // This maps a 3-integer twist index into the twist number in the file - std::map, int, Int3less> TwistMap; - - bool TwistPair(PosType a, PosType b) const; - void TileIons(); - void OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks = false); - void OccupyBands_ESHDF(int spin, int sortBands, int numOrbs); - - //////////////////////////////// - // Atomic orbital information // - //////////////////////////////// - struct CenterInfo - { - std::vector lmax, spline_npoints, GroupID; - std::vector spline_radius, cutoff, inner_cutoff, non_overlapping_radius; - std::vector> ion_pos; - int Ncenters; - - CenterInfo() : Ncenters(0){}; - - void resize(int ncenters) - { - Ncenters = ncenters; - lmax.resize(ncenters, -1); - spline_npoints.resize(ncenters, -1); - GroupID.resize(ncenters, 0); - spline_radius.resize(ncenters, -1.0); - inner_cutoff.resize(ncenters, -1.0); - non_overlapping_radius.resize(ncenters, -1.0); - cutoff.resize(ncenters, -1.0); - ion_pos.resize(ncenters); - } - } AtomicCentersInfo; - - // This returns the path in the HDF5 file to the group for orbital - // with twist ti and band bi - std::string OrbitalPath(int ti, int bi); - - ///////////////////////////////////////////////////////////// - // Information to avoid storing the same orbitals twice in // - // spin-restricted calculations. // - ///////////////////////////////////////////////////////////// - int LastSpinSet, NumOrbitalsRead; - - std::string occ_format; - int particle_hole_pairs; - bool makeRotations; - -protected: - /** broadcast SortBands - * @param N number of state - * @param root true if it is the i/o node - */ - void bcastSortBands(int splin, int N, bool root); - - /** a specific but clean code path in createSPOSetFromXML, for PBC, double, ESHDF - * @param cur the current xml node - */ - void set_metadata(int numOrbs, - int twist_num_inp, - const TinyVector& twist_inp, - bool skipChecks = false); - - /** analyze twists of orbitals in h5 and determinine twist_num_ - * @param twist_num_inp twistnum XML input - * @param twist_inp twst XML input - */ - void AnalyzeTwists2(const int twist_num_inp, const TinyVector& twist_inp); - - /// twistnum_inp == -9999 to indicate no given input after parsing XML - static constexpr int TWISTNUM_NO_INPUT = -9999; - /// twist_inp[i] <= -9999 to indicate no given input after parsing XML - static constexpr double TWIST_NO_INPUT = -9999; -}; +using EinsplineSetBuilder = EinsplineSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h index 73b7f885c8..38be28d9a3 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.h @@ -19,32 +19,13 @@ #ifndef QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDER_H #define QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "BsplineFactory/EinsplineSetBuilder.h" -class Communicate; +#include "Configuration.h" +#include "QMCWaveFunctions/EinsplineSpinorSetBuilderT.h" namespace qmcplusplus { - -class EinsplineSpinorSetBuilder : public EinsplineSetBuilder -{ - using PSetMap = std::map>; - -public: - ///constructor - EinsplineSpinorSetBuilder(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur) - : EinsplineSetBuilder(p, psets, comm, cur){}; - - ///destructor - ~EinsplineSpinorSetBuilder() override{}; - - /** initialize the Antisymmetric wave function for electrons - * @param cur the current xml node - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; -}; +using EinsplineSpinorSetBuilder = EinsplineSpinorSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h index 462496a73b..83dde48ba1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h @@ -17,749 +17,15 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H #define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALS_H -#include "Particle/DistanceTable.h" -#include "Particle/VirtualParticleSet.h" -#include "Numerics/SoaSphericalTensor.h" -#include "spline2/MultiBspline1D.hpp" -#include "Numerics/SmoothFunctions.hpp" -#include "hdf/hdf_archive.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" namespace qmcplusplus { -template -class HybridRepSetReader; - template -class AtomicOrbitals -{ -public: - static const int D = 3; - using AtomicSplineType = typename bspline_traits::SplineType; - using AtomicBCType = typename bspline_traits::BCType; - using AtomicSingleSplineType = UBspline_1d_d; - using PointType = TinyVector; - using value_type = ST; - - using vContainer_type = aligned_vector; - -private: - // near core cutoff - ST rmin; - // far from core cutoff, rmin_sqrt>=rmin - ST rmin_sqrt; - ST cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; - int spline_npoints, BaseN; - int NumBands, Npad; - PointType center_pos; - const int lmax, lm_tot; - SoaSphericalTensor Ylm; - vContainer_type l_vals; - vContainer_type r_power_minus_l; - ///1D spline of radial functions of all the orbitals - std::shared_ptr> SplineInst; - - vContainer_type localV, localG, localL; - -public: - AtomicOrbitals(int Lmax) : lmax(Lmax), lm_tot((Lmax + 1) * (Lmax + 1)), Ylm(Lmax) - { - r_power_minus_l.resize(lm_tot); - l_vals.resize(lm_tot); - for (int l = 0; l <= lmax; l++) - for (int m = -l; m <= l; m++) - l_vals[l * (l + 1) + m] = l; - rmin = std::exp(std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); - rmin = std::max(rmin, std::numeric_limits::epsilon()); - rmin_sqrt = std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); - } - - // accessing functions, const only - ST getCutoff() const { return cutoff; } - ST getCutoffBuffer() const { return cutoff_buffer; } - ST getSplineRadius() const { return spline_radius; } - ST getNonOverlappingRadius() const { return non_overlapping_radius; } - int getSplineNpoints() const { return spline_npoints; } - int getLmax() const { return lmax; } - const PointType& getCenterPos() const { return center_pos; } - - inline void resizeStorage(size_t Nb) - { - NumBands = Nb; - Npad = getAlignedSize(Nb); - localV.resize(Npad * lm_tot); - localG.resize(Npad * lm_tot); - localL.resize(Npad * lm_tot); - create_spline(); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm, std::vector& offset) - { - gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); - } - - template - inline void set_info(const PT& R, - const VT& cutoff_in, - const VT& cutoff_buffer_in, - const VT& spline_radius_in, - const VT& non_overlapping_radius_in, - const int spline_npoints_in) - { - center_pos[0] = R[0]; - center_pos[1] = R[1]; - center_pos[2] = R[2]; - cutoff = cutoff_in; - cutoff_buffer = cutoff_buffer_in; - spline_radius = spline_radius_in; - spline_npoints = spline_npoints_in; - non_overlapping_radius = non_overlapping_radius_in; - BaseN = spline_npoints + 2; - } - - inline void create_spline() - { - AtomicBCType bc; - bc.lCode = FLAT; - bc.rCode = NATURAL; - Ugrid grid; - grid.start = 0.0; - grid.end = spline_radius; - grid.num = spline_npoints; - SplineInst = std::make_shared>(); - SplineInst->create(grid, bc, lm_tot * Npad); - } - - inline size_t getSplineSizeInBytes() const { return SplineInst->sizeInByte(); } - - inline void flush_zero() { SplineInst->flush_zero(); } - - inline void set_spline(AtomicSingleSplineType* spline, int lm, int ispline) - { - SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); - } - - bool read_splines(hdf_archive& h5f) - { - einspline_engine bigtable(SplineInst->getSplinePtr()); - int lmax_in = 0, spline_npoints_in = 0; - ST spline_radius_in; - if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) - return false; - if (!h5f.readEntry(spline_radius_in, "spline_radius") || spline_radius_in != spline_radius) - return false; - if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || spline_npoints_in != spline_npoints) - return false; - return h5f.readEntry(bigtable, "radial_spline"); - } - - bool write_splines(hdf_archive& h5f) - { - bool success = true; - success = success && h5f.writeEntry(spline_radius, "spline_radius"); - success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); - success = success && h5f.writeEntry(lmax, "l_max"); - success = success && h5f.writeEntry(center_pos, "position"); - einspline_engine bigtable(SplineInst->getSplinePtr()); - success = success && h5f.writeEntry(bigtable, "radial_spline"); - return success; - } - - //evaluate only V - template - inline void evaluate_v(const ST& r, const PointType& dr, VV& myV) - { - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); - else - Ylm.evaluateV(0, 0, 1); - const ST* restrict Ylm_v = Ylm[0]; - - constexpr ST czero(0); - ST* restrict val = myV.data(); - ST* restrict local_val = localV.data(); - std::fill(myV.begin(), myV.end(), czero); - - SplineInst->evaluate(r, localV); - - for (size_t lm = 0; lm < lm_tot; lm++) - { -#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } - } - - template - inline void evaluateValues(const DISPL& Displacements, const int center_idx, const ST& r, VM& multi_myV) - { - if (r <= std::numeric_limits::epsilon()) - Ylm.evaluateV(0, 0, 1); - const ST* restrict Ylm_v = Ylm[0]; - - const size_t m = multi_myV.cols(); - constexpr ST czero(0); - std::fill(multi_myV.begin(), multi_myV.end(), czero); - SplineInst->evaluate(r, localV); - - for (int ivp = 0; ivp < Displacements.size(); ivp++) - { - PointType dr = Displacements[ivp][center_idx]; - if (r > std::numeric_limits::epsilon()) - Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); - - ST* restrict val = multi_myV[ivp]; - ST* restrict local_val = localV.data(); - for (size_t lm = 0; lm < lm_tot; lm++) - { -#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < m; ib++) - val[ib] += Ylm_v[lm] * local_val[ib]; - local_val += Npad; - } - } - } - - //evaluate VGL - template - inline void evaluate_vgl(const ST& r, const PointType& dr, VV& myV, GV& myG, VV& myL) - { - ST drx, dry, drz, rhatx, rhaty, rhatz, rinv; - if (r > rmin) - { - rinv = 1.0 / r; - } - else - { - rinv = 0; - } - drx = dr[0]; - dry = dr[1]; - drz = dr[2]; - rhatx = drx * rinv; - rhaty = dry * rinv; - rhatz = drz * rinv; - - Ylm.evaluateVGL(drx, dry, drz); - const ST* restrict Ylm_v = Ylm[0]; - const ST* restrict Ylm_gx = Ylm[1]; - const ST* restrict Ylm_gy = Ylm[2]; - const ST* restrict Ylm_gz = Ylm[3]; - - ST* restrict g0 = myG.data(0); - ST* restrict g1 = myG.data(1); - ST* restrict g2 = myG.data(2); - constexpr ST czero(0), cone(1), chalf(0.5); - std::fill(myV.begin(), myV.end(), czero); - std::fill(g0, g0 + Npad, czero); - std::fill(g1, g1 + Npad, czero); - std::fill(g2, g2 + Npad, czero); - std::fill(myL.begin(), myL.end(), czero); - ST* restrict val = myV.data(); - ST* restrict lapl = myL.data(); - ST* restrict local_val = localV.data(); - ST* restrict local_grad = localG.data(); - ST* restrict local_lapl = localL.data(); - - SplineInst->evaluate_vgl(r, localV, localG, localL); - - if (r > rmin_sqrt) - { - // far from core - r_power_minus_l[0] = cone; - ST r_power_temp = cone; - for (int l = 1; l <= lmax; l++) - { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) - { - const ST& l_val = l_vals[lm]; - const ST& r_power = r_power_minus_l[lm]; - const ST Ylm_rescale = Ylm_v[lm] * r_power; - const ST rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_v = local_val[ib]; - const ST local_g = local_grad[ib]; - const ST local_l = local_lapl[ib]; - // value - const ST Vpart = l_val * rinv * local_v; - val[ib] += Ylm_rescale * local_v; - - // grad - const ST factor1 = local_g * Ylm_rescale; - const ST factor2 = local_v * r_power; - const ST factor3 = -Vpart * Ylm_rescale; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; - - // laplacian - lapl[ib] += (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * Ylm_rescale + (local_g - Vpart) * rhat_dot_G; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else if (r > rmin) - { - // the possibility of reaching here is very very low - std::cout << "Warning: an electron is very close to an ion, distance=" << r << " be careful!" << std::endl; - // near core, kill divergence in the laplacian - r_power_minus_l[0] = cone; - ST r_power_temp = cone; - for (int l = 1; l <= lmax; l++) - { - r_power_temp *= rinv; - for (int m = -l, lm = l * l; m <= l; m++, lm++) - r_power_minus_l[lm] = r_power_temp; - } - - for (size_t lm = 0; lm < lm_tot; lm++) - { - const ST& l_val = l_vals[lm]; - const ST& r_power = r_power_minus_l[lm]; - const ST Ylm_rescale = Ylm_v[lm] * r_power; - const ST rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r; -#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_v = local_val[ib]; - const ST local_g = local_grad[ib]; - const ST local_l = local_lapl[ib]; - // value - const ST Vpart = Ylm_rescale * local_v; - val[ib] += Vpart; - - // grad - const ST factor1 = local_g * Ylm_rescale; - const ST factor2 = local_v * r_power; - const ST factor3 = -l_val * Vpart * rinv; - g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; - g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; - g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; - - // laplacian - lapl[ib] += local_l * (cone - chalf * l_val) * (3 * Ylm_rescale + rhat_dot_G); - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - } - } - else - { - std::cout << "Warning: an electron is on top of an ion!" << std::endl; - // strictly zero - -#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - // value - val[ib] = Ylm_v[0] * local_val[ib]; - - // laplacian - lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; - } - local_val += Npad; - local_grad += Npad; - local_lapl += Npad; - if (lm_tot > 0) - { - //std::cout << std::endl; - for (size_t lm = 1; lm < 4; lm++) - { -#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) - for (size_t ib = 0; ib < myV.size(); ib++) - { - const ST local_g = local_grad[ib]; - // grad - g0[ib] += local_g * Ylm_gx[lm]; - g1[ib] += local_g * Ylm_gy[lm]; - g2[ib] += local_g * Ylm_gz[lm]; - } - local_grad += Npad; - } - } - } - } - - template - void evaluate_vgh(const ST& r, const PointType& dr, VV& myV, GV& myG, HT& myH) - { - //Needed to do tensor product here - APP_ABORT("AtomicOrbitals::evaluate_vgh"); - } -}; +using AtomicOrbitals = AtomicOrbitalsT; template -class HybridRepCenterOrbitals -{ -public: - static const int D = 3; - using PointType = typename AtomicOrbitals::PointType; - using RealType = typename DistanceTable::RealType; - using PosType = typename DistanceTable::PosType; - - enum class Region - { - INSIDE, // within the buffer shell - BUFFER, // in the buffer region - INTER // interstitial area - }; - - struct LocationSmoothingInfo - { - ///r from distance table - RealType dist_r; - ///dr from distance table - PosType dist_dr; - ///for APBC - PointType r_image; - /// region of the location - Region region; - ///smooth function value - RealType f; - ///smooth function first derivative - RealType df_dr; - ///smooth function second derivative - RealType d2f_dr2; - }; - -private: - ///atomic centers - std::vector> AtomicCenters; - ///table index - int myTableID; - ///mapping supercell to primitive cell - std::vector Super2Prim; - ///smoothing schemes - enum class smoothing_schemes - { - CONSISTENT = 0, - SMOOTHALL, - SMOOTHPARTIAL - } smooth_scheme; - /// smoothing function - smoothing_functions smooth_func_id; - - /// select a region (within the buffer shell, in the buffer, interstitial region) and compute the smoothing function if in the buffer. - inline void selectRegionAndComputeSmoothing(const ST& cutoff_buffer, - const ST& cutoff, - LocationSmoothingInfo& info) const - { - const RealType r = info.dist_r; - if (r < cutoff_buffer) - info.region = Region::INSIDE; - else if (r < cutoff) - { - constexpr RealType cone(1); - const RealType scale = cone / (cutoff - cutoff_buffer); - const RealType x = (r - cutoff_buffer) * scale; - info.f = smoothing(smooth_func_id, x, info.df_dr, info.d2f_dr2); - info.df_dr *= scale; - info.d2f_dr2 *= scale * scale; - info.region = Region::BUFFER; - } - else - info.region = Region::INTER; - } - -public: - HybridRepCenterOrbitals() {} - - void set_info(const ParticleSet& ions, ParticleSet& els, const std::vector& mapping) - { - myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); - Super2Prim = mapping; - } - - inline void resizeStorage(size_t Nb) - { - size_t SplineCoefsBytes = 0; - - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - AtomicCenters[ic].resizeStorage(Nb); - SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); - } - - app_log() << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " - << "for the atomic radial splines in hybrid orbital representation" << std::endl; - } - - void bcast_tables(Communicate* comm) - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].bcast_tables(comm); - } - - void gather_atomic_tables(Communicate* comm, std::vector& offset) - { - if (comm->size() == 1) - return; - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].gather_tables(comm, offset); - } - - inline void flush_zero() - { - for (int ic = 0; ic < AtomicCenters.size(); ic++) - AtomicCenters[ic].flush_zero(); - } - - bool read_splines(hdf_archive& h5f) - { - bool success = true; - size_t ncenter; - - try - { - h5f.push("atomic_centers", false); - } - catch (...) - { - success = false; - } - success = success && h5f.readEntry(ncenter, "number_of_centers"); - if (!success) - return success; - if (ncenter != AtomicCenters.size()) - success = false; - // read splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - std::ostringstream gname; - gname << "center_" << ic; - try - { - h5f.push(gname.str().c_str(), false); - } - catch (...) - { - success = false; - } - success = success && AtomicCenters[ic].read_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - bool write_splines(hdf_archive& h5f) - { - bool success = true; - int ncenter = AtomicCenters.size(); - try - { - h5f.push("atomic_centers", true); - } - catch (...) - { - success = false; - } - success = success && h5f.writeEntry(ncenter, "number_of_centers"); - // write splines of each center - for (int ic = 0; ic < AtomicCenters.size(); ic++) - { - std::ostringstream gname; - gname << "center_" << ic; - try - { - h5f.push(gname.str().c_str(), true); - } - catch (...) - { - success = false; - } - success = success && AtomicCenters[ic].write_splines(h5f); - h5f.pop(); - } - h5f.pop(); - return success; - } - - template - inline int get_bc_sign(const PointType& r, - const PointType& r_image, - const Cell& PrimLattice, - TinyVector& HalfG) const - { - int bc_sign = 0; - PointType shift_unit = PrimLattice.toUnit(r - r_image); - for (int i = 0; i < D; i++) - { - ST img = round(shift_unit[i]); - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - //evaluate only V - template - inline void evaluate_v(const ParticleSet& P, const int iat, VV& myV, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_v(info.dist_r, dr, myV); - } - } - - /* check if the batched algorithm is safe to operate - * @param VP virtual particle set - * @return true if it is safe - * - * When the reference electron in the NLPP evaluation has a distance larger than the non overlapping radius of the reference center. - * Some qudrature points may get its SPOs evaluated from the nearest center which is not the reference center. - * The batched algorthm forces the evaluation on the reference center and introduce some error. - * In this case, the non-batched algorithm should be used. - */ - bool is_batched_safe(const VirtualParticleSet& VP) const - { - const int center_idx = VP.refSourcePtcl; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - return VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < - myCenter.getNonOverlappingRadius(); - } - - // C2C, C2R cases - template - inline void evaluateValuesC2X(const VirtualParticleSet& VP, VM& multi_myV, LocationSmoothingInfo& info) - { - const int center_idx = VP.refSourcePtcl; - info.dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, info.dist_r, multi_myV); - } - - // R2R case - template - inline void evaluateValuesR2R(const VirtualParticleSet& VP, - const Cell& PrimLattice, - TinyVector& HalfG, - VM& multi_myV, - SV& bc_signs, - LocationSmoothingInfo& info) - { - const int center_idx = VP.refSourcePtcl; - info.dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); - for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) - bc_signs[ivp] = get_bc_sign(VP.R[ivp], myCenter.getCenterPos() - displ[ivp][center_idx], PrimLattice, HalfG); - myCenter.evaluateValues(displ, center_idx, info.dist_r, multi_myV); - } - } - - //evaluate only VGL - template - inline void evaluate_vgl(const ParticleSet& P, const int iat, VV& myV, GV& myG, VV& myL, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgl(info.dist_r, dr, myV, myG, myL); - } - } - - //evaluate only VGH - template - inline void evaluate_vgh(const ParticleSet& P, const int iat, VV& myV, GV& myG, HT& myH, LocationSmoothingInfo& info) - { - const auto& ei_dist = P.getDistTableAB(myTableID); - const int center_idx = ei_dist.get_first_neighbor(iat, info.dist_r, info.dist_dr, P.getActivePtcl() == iat); - auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; - selectRegionAndComputeSmoothing(myCenter.getCutoffBuffer(), myCenter.getCutoff(), info); - if (info.region != Region::INTER) - { - const PointType dr(-info.dist_dr[0], -info.dist_dr[1], -info.dist_dr[2]); - info.r_image = myCenter.getCenterPos() + dr; - myCenter.evaluate_vgh(info.dist_r, dr, myV, myG, myH); - } - } - - // interpolate buffer region, value only - template - inline void interpolate_buffer_v(VV& psi, const VV& psi_AO, const RealType f) const - { - constexpr RealType cone(1); - for (size_t i = 0; i < psi.size(); i++) - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - - // interpolate buffer region, value, gradients and laplacian - template - inline void interpolate_buffer_vgl(VV& psi, - GV& dpsi, - VV& d2psi, - const VV& psi_AO, - const GV& dpsi_AO, - const VV& d2psi_AO, - const LocationSmoothingInfo& info) const - { - constexpr RealType cone(1), ctwo(2); - const RealType rinv(1.0 / info.dist_r); - auto& dist_dr = info.dist_dr; - auto& f = info.f; - auto& df_dr = info.df_dr; - auto& d2f_dr2 = info.d2f_dr2; - if (smooth_scheme == smoothing_schemes::CONSISTENT) - for (size_t i = 0; i < psi.size(); i++) - { // psi, dpsi, d2psi are all consistent - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + - (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHALL) - for (size_t i = 0; i < psi.size(); i++) - { - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) - for (size_t i = 0; i < psi.size(); i++) - { // dpsi, d2psi are consistent but psi is not. - d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); - dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); - psi[i] = psi_AO[i] * f + psi[i] * (cone - f); - } - else - throw std::runtime_error("Unknown smooth scheme!"); - } - - template - friend class qmcplusplus::HybridRepSetReader; -}; - -extern template class AtomicOrbitals; -extern template class AtomicOrbitals; -extern template class HybridRepCenterOrbitals; -extern template class HybridRepCenterOrbitals; +using HybridRepCenterOrbitals = HybridRepCenterOrbitals; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp similarity index 62% rename from src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp rename to src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp index 9f92bd0ea7..da978b3647 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.cpp @@ -10,12 +10,14 @@ ////////////////////////////////////////////////////////////////////////////////////// -#include "HybridRepCenterOrbitals.h" +#include "HybridRepCenterOrbitalsT.h" namespace qmcplusplus { -template class AtomicOrbitals; -template class AtomicOrbitals; -template class HybridRepCenterOrbitals; -template class HybridRepCenterOrbitals; +template class AtomicOrbitalsT; +template class AtomicOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; +template class HybridRepCenterOrbitalsT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h new file mode 100644 index 0000000000..f31dd3cf83 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h @@ -0,0 +1,744 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALST_H +#define QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALST_H + +#include "Numerics/SmoothFunctions.hpp" +#include "Numerics/SoaSphericalTensor.h" +#include "Particle/DistanceTableT.h" +#include "Particle/VirtualParticleSetT.h" +#include "hdf/hdf_archive.h" +#include "spline2/MultiBspline1D.hpp" + +namespace qmcplusplus +{ +template +class HybridRepSetReaderT; + +template +class AtomicOrbitalsT +{ +public: + static const int D = 3; + using AtomicSplineType = typename bspline_traits::SplineType; + using AtomicBCType = typename bspline_traits::BCType; + using AtomicSingleSplineType = UBspline_1d_d; + using PointType = TinyVector; + using value_type = T; + + using vContainer_type = aligned_vector; + +private: + // near core cutoff + T rmin; + // far from core cutoff, rmin_sqrt>=rmin + T rmin_sqrt; + T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius; + int spline_npoints, BaseN; + int NumBands, Npad; + PointType center_pos; + const int lmax, lm_tot; + SoaSphericalTensor Ylm; + vContainer_type l_vals; + vContainer_type r_power_minus_l; + /// 1D spline of radial functions of all the orbitals + std::shared_ptr> SplineInst; + + vContainer_type localV, localG, localL; + +public: + AtomicOrbitalsT(int Lmax) : lmax(Lmax), lm_tot((Lmax + 1) * (Lmax + 1)), Ylm(Lmax) + { + r_power_minus_l.resize(lm_tot); + l_vals.resize(lm_tot); + for (int l = 0; l <= lmax; l++) + for (int m = -l; m <= l; m++) + l_vals[l * (l + 1) + m] = l; + rmin = std::exp(std::log(std::numeric_limits::min()) / std::max(Lmax, 1)); + rmin = std::max(rmin, std::numeric_limits::epsilon()); + rmin_sqrt = std::max(rmin, std::sqrt(std::numeric_limits::epsilon())); + } + + // accessing functions, const only + T getCutoff() const { return cutoff; } + T getCutoffBuffer() const { return cutoff_buffer; } + T getSplineRadius() const { return spline_radius; } + T getNonOverlappingRadius() const { return non_overlapping_radius; } + int getSplineNpoints() const { return spline_npoints; } + int getLmax() const { return lmax; } + const PointType& getCenterPos() const { return center_pos; } + + inline void resizeStorage(size_t Nb) + { + NumBands = Nb; + Npad = getAlignedSize(Nb); + localV.resize(Npad * lm_tot); + localG.resize(Npad * lm_tot); + localL.resize(Npad * lm_tot); + create_spline(); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm, std::vector& offset) + { + gatherv(comm, SplineInst->getSplinePtr(), Npad, offset); + } + + template + inline void set_info(const PT& R, + const VT& cutoff_in, + const VT& cutoff_buffer_in, + const VT& spline_radius_in, + const VT& non_overlapping_radius_in, + const int spline_npoints_in) + { + center_pos[0] = R[0]; + center_pos[1] = R[1]; + center_pos[2] = R[2]; + cutoff = cutoff_in; + cutoff_buffer = cutoff_buffer_in; + spline_radius = spline_radius_in; + spline_npoints = spline_npoints_in; + non_overlapping_radius = non_overlapping_radius_in; + BaseN = spline_npoints + 2; + } + + inline void create_spline() + { + AtomicBCType bc; + bc.lCode = FLAT; + bc.rCode = NATURAL; + Ugrid grid; + grid.start = 0.0; + grid.end = spline_radius; + grid.num = spline_npoints; + SplineInst = std::make_shared>(); + SplineInst->create(grid, bc, lm_tot * Npad); + } + + inline size_t getSplineSizeInBytes() const { return SplineInst->sizeInByte(); } + + inline void flush_zero() { SplineInst->flush_zero(); } + + inline void set_spline(AtomicSingleSplineType* spline, int lm, int ispline) + { + SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN); + } + + bool read_splines(hdf_archive& h5f) + { + einspline_engine bigtable(SplineInst->getSplinePtr()); + int lmax_in = 0, spline_npoints_in = 0; + T spline_radius_in; + if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax) + return false; + if (!h5f.readEntry(spline_radius_in, "spline_radius") || spline_radius_in != spline_radius) + return false; + if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || spline_npoints_in != spline_npoints) + return false; + return h5f.readEntry(bigtable, "radial_spline"); + } + + bool write_splines(hdf_archive& h5f) + { + bool success = true; + success = success && h5f.writeEntry(spline_radius, "spline_radius"); + success = success && h5f.writeEntry(spline_npoints, "spline_npoints"); + success = success && h5f.writeEntry(lmax, "l_max"); + success = success && h5f.writeEntry(center_pos, "position"); + einspline_engine bigtable(SplineInst->getSplinePtr()); + success = success && h5f.writeEntry(bigtable, "radial_spline"); + return success; + } + + // evaluate only V + template + inline void evaluate_v(const T& r, const PointType& dr, VV& myV) + { + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r); + else + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + constexpr T czero(0); + T* restrict val = myV.data(); + T* restrict local_val = localV.data(); + std::fill(myV.begin(), myV.end(), czero); + + SplineInst->evaluate(r, localV); + + for (size_t lm = 0; lm < lm_tot; lm++) + { +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + + template + inline void evaluateValues(const DISPL& Displacements, const int center_idx, const T& r, VM& multi_myV) + { + if (r <= std::numeric_limits::epsilon()) + Ylm.evaluateV(0, 0, 1); + const T* restrict Ylm_v = Ylm[0]; + + const size_t m = multi_myV.cols(); + constexpr T czero(0); + std::fill(multi_myV.begin(), multi_myV.end(), czero); + SplineInst->evaluate(r, localV); + + for (int ivp = 0; ivp < Displacements.size(); ivp++) + { + PointType dr = Displacements[ivp][center_idx]; + if (r > std::numeric_limits::epsilon()) + Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r); + + T* restrict val = multi_myV[ivp]; + T* restrict local_val = localV.data(); + for (size_t lm = 0; lm < lm_tot; lm++) + { +#pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < m; ib++) + val[ib] += Ylm_v[lm] * local_val[ib]; + local_val += Npad; + } + } + } + + // evaluate VGL + template + inline void evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL) + { + T drx, dry, drz, rhatx, rhaty, rhatz, rinv; + if (r > rmin) + { + rinv = 1.0 / r; + } + else + { + rinv = 0; + } + drx = dr[0]; + dry = dr[1]; + drz = dr[2]; + rhatx = drx * rinv; + rhaty = dry * rinv; + rhatz = drz * rinv; + + Ylm.evaluateVGL(drx, dry, drz); + const T* restrict Ylm_v = Ylm[0]; + const T* restrict Ylm_gx = Ylm[1]; + const T* restrict Ylm_gy = Ylm[2]; + const T* restrict Ylm_gz = Ylm[3]; + + T* restrict g0 = myG.data(0); + T* restrict g1 = myG.data(1); + T* restrict g2 = myG.data(2); + constexpr T czero(0), cone(1), chalf(0.5); + std::fill(myV.begin(), myV.end(), czero); + std::fill(g0, g0 + Npad, czero); + std::fill(g1, g1 + Npad, czero); + std::fill(g2, g2 + Npad, czero); + std::fill(myL.begin(), myL.end(), czero); + T* restrict val = myV.data(); + T* restrict lapl = myL.data(); + T* restrict local_val = localV.data(); + T* restrict local_grad = localG.data(); + T* restrict local_lapl = localL.data(); + + SplineInst->evaluate_vgl(r, localV, localG, localL); + + if (r > rmin_sqrt) + { + // far from core + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) + { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) + { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power; +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = l_val * rinv * local_v; + val[ib] += Ylm_rescale * local_v; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -Vpart * Ylm_rescale; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; + + // laplacian + lapl[ib] += (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * Ylm_rescale + (local_g - Vpart) * rhat_dot_G; + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } + } + else if (r > rmin) + { + // the possibility of reaching here is very very low + std::cout << "Warning: an electron is very close to an ion, distance=" << r << " be careful!" << std::endl; + // near core, kill divergence in the laplacian + r_power_minus_l[0] = cone; + T r_power_temp = cone; + for (int l = 1; l <= lmax; l++) + { + r_power_temp *= rinv; + for (int m = -l, lm = l * l; m <= l; m++, lm++) + r_power_minus_l[lm] = r_power_temp; + } + + for (size_t lm = 0; lm < lm_tot; lm++) + { + const T& l_val = l_vals[lm]; + const T& r_power = r_power_minus_l[lm]; + const T Ylm_rescale = Ylm_v[lm] * r_power; + const T rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r; +#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_v = local_val[ib]; + const T local_g = local_grad[ib]; + const T local_l = local_lapl[ib]; + // value + const T Vpart = Ylm_rescale * local_v; + val[ib] += Vpart; + + // grad + const T factor1 = local_g * Ylm_rescale; + const T factor2 = local_v * r_power; + const T factor3 = -l_val * Vpart * rinv; + g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx; + g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty; + g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz; + + // laplacian + lapl[ib] += local_l * (cone - chalf * l_val) * (3 * Ylm_rescale + rhat_dot_G); + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + } + } + else + { + std::cout << "Warning: an electron is on top of an ion!" << std::endl; + // strictly zero + +#pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + // value + val[ib] = Ylm_v[0] * local_val[ib]; + + // laplacian + lapl[ib] = local_lapl[ib] * static_cast(3) * Ylm_v[0]; + } + local_val += Npad; + local_grad += Npad; + local_lapl += Npad; + if (lm_tot > 0) + { + // std::cout << std::endl; + for (size_t lm = 1; lm < 4; lm++) + { +#pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT) + for (size_t ib = 0; ib < myV.size(); ib++) + { + const T local_g = local_grad[ib]; + // grad + g0[ib] += local_g * Ylm_gx[lm]; + g1[ib] += local_g * Ylm_gy[lm]; + g2[ib] += local_g * Ylm_gz[lm]; + } + local_grad += Npad; + } + } + } + } + + template + void evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH) + { + // Needed to do tensor product here + APP_ABORT("AtomicOrbitals::evaluate_vgh"); + } +}; + +template +class HybridRepCenterOrbitalsT +{ +public: + static const int D = 3; + using PointType = typename AtomicOrbitalsT::PointType; + using RealType = typename DistanceTableT::RealType; + using PosType = typename DistanceTableT::PosType; + +private: + /// atomic centers + std::vector> AtomicCenters; + /// table index + int myTableID; + /// mapping supercell to primitive cell + std::vector Super2Prim; + /// r from distance table + RealType dist_r; + /// dr from distance table + PosType dist_dr; + /// for APBC + PointType r_image; + /// smooth function value + RealType f; + /// smooth function first derivative + RealType df_dr; + /// smooth function second derivative + RealType d2f_dr2; + /// smoothing schemes + enum class smoothing_schemes + { + CONSISTENT = 0, + SMOOTHALL, + SMOOTHPARTIAL + } smooth_scheme; + /// smoothing function + smoothing_functions smooth_func_id; + +public: + HybridRepCenterOrbitalsT() {} + + void set_info(const ParticleSetT& ions, ParticleSetT& els, const std::vector& mapping) + { + myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST); + Super2Prim = mapping; + } + + inline void resizeStorage(size_t Nb) + { + size_t SplineCoefsBytes = 0; + + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + AtomicCenters[ic].resizeStorage(Nb); + SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes(); + } + + app_log() << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated " + << "for the atomic radial splines in hybrid orbital representation" << std::endl; + } + + void bcast_tables(Communicate* comm) + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].bcast_tables(comm); + } + + void gather_atomic_tables(Communicate* comm, std::vector& offset) + { + if (comm->size() == 1) + return; + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].gather_tables(comm, offset); + } + + inline void flush_zero() + { + for (int ic = 0; ic < AtomicCenters.size(); ic++) + AtomicCenters[ic].flush_zero(); + } + + bool read_splines(hdf_archive& h5f) + { + bool success = true; + size_t ncenter; + + try + { + h5f.push("atomic_centers", false); + } + catch (...) + { + success = false; + } + success = success && h5f.readEntry(ncenter, "number_of_centers"); + if (!success) + return success; + if (ncenter != AtomicCenters.size()) + success = false; + // read splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + std::ostringstream gname; + gname << "center_" << ic; + try + { + h5f.push(gname.str().c_str(), false); + } + catch (...) + { + success = false; + } + success = success && AtomicCenters[ic].read_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + bool write_splines(hdf_archive& h5f) + { + bool success = true; + int ncenter = AtomicCenters.size(); + try + { + h5f.push("atomic_centers", true); + } + catch (...) + { + success = false; + } + success = success && h5f.writeEntry(ncenter, "number_of_centers"); + // write splines of each center + for (int ic = 0; ic < AtomicCenters.size(); ic++) + { + std::ostringstream gname; + gname << "center_" << ic; + try + { + h5f.push(gname.str().c_str(), true); + } + catch (...) + { + success = false; + } + success = success && AtomicCenters[ic].write_splines(h5f); + h5f.pop(); + } + h5f.pop(); + return success; + } + + template + inline int get_bc_sign(const PointType& r, const Cell& PrimLattice, TinyVector& HalfG) + { + int bc_sign = 0; + PointType shift_unit = PrimLattice.toUnit(r - r_image); + for (int i = 0; i < D; i++) + { + ST img = round(shift_unit[i]); + bc_sign += HalfG[i] * (int)img; + } + return bc_sign; + } + + // evaluate only V + template + inline RealType evaluate_v(const ParticleSetT& P, const int iat, VV& myV) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_v(dist_r, dr, myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + /* check if the batched algorithm is safe to operate + * @param VP virtual particle set + * @return true if it is safe + * + * When the reference electron in the NLPP evaluation has a distance larger + * than the non overlapping radius of the reference center. Some qudrature + * points may get its SPOs evaluated from the nearest center which is not + * the reference center. The batched algorthm forces the evaluation on the + * reference center and introduce some error. In this case, the non-batched + * algorithm should be used. + */ + bool is_batched_safe(const VirtualParticleSetT& VP) + { + const int center_idx = VP.refSourcePtcl; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + return VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] < + myCenter.getNonOverlappingRadius(); + } + + // C2C, C2R cases + template + inline RealType evaluateValuesC2X(const VirtualParticleSetT& VP, VM& multi_myV) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, dist_r, multi_myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // R2R case + template + inline RealType evaluateValuesR2R(const VirtualParticleSetT& VP, + const Cell& PrimLattice, + TinyVector& HalfG, + VM& multi_myV, + SV& bc_signs) + { + const int center_idx = VP.refSourcePtcl; + dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx]; + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + const auto& displ = VP.getDistTableAB(myTableID).getDisplacements(); + for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) + { + r_image = myCenter.getCenterPos() - displ[ivp][center_idx]; + bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG); + ; + } + myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGL + template + inline RealType evaluate_vgl(const ParticleSetT& P, const int iat, VV& myV, GV& myG, VV& myL) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // evaluate only VGH + template + inline RealType evaluate_vgh(const ParticleSetT& P, const int iat, VV& myV, GV& myG, HT& myH) + { + const auto& ei_dist = P.getDistTableAB(myTableID); + const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat); + if (center_idx < 0) + abort(); + auto& myCenter = AtomicCenters[Super2Prim[center_idx]]; + if (dist_r < myCenter.getCutoff()) + { + PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]); + r_image = myCenter.getCenterPos() + dr; + myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH); + return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r); + } + return RealType(-1); + } + + // interpolate buffer region, value only + template + inline void interpolate_buffer_v(VV& psi, const VV& psi_AO) const + { + const RealType cone(1); + for (size_t i = 0; i < psi.size(); i++) + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + + // interpolate buffer region, value, gradients and laplacian + template + inline void interpolate_buffer_vgl(VV& psi, + GV& dpsi, + VV& d2psi, + const VV& psi_AO, + const GV& dpsi_AO, + const VV& d2psi_AO) const + { + const RealType cone(1), ctwo(2); + const RealType rinv(1.0 / dist_r); + if (smooth_scheme == smoothing_schemes::CONSISTENT) + for (size_t i = 0; i < psi.size(); i++) + { // psi, dpsi, d2psi are all consistent + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) + + (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHALL) + for (size_t i = 0; i < psi.size(); i++) + { + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL) + for (size_t i = 0; i < psi.size(); i++) + { // dpsi, d2psi are consistent but psi is not. + d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr); + dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f); + psi[i] = psi_AO[i] * f + psi[i] * (cone - f); + } + else + throw std::runtime_error("Unknown smooth scheme!"); + } + + inline RealType smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r) + { + const RealType cone(1); + if (r < cutoff_buffer) + return cone; + const RealType scale = cone / (cutoff - cutoff_buffer); + const RealType x = (r - cutoff_buffer) * scale; + f = smoothing(smooth_func_id, x, df_dr, d2f_dr2); + df_dr *= scale; + d2f_dr2 *= scale * scale; + return f; + } + + template + friend class HybridRepSetReaderT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h index 79405ee8aa..aedaee7cc1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h @@ -18,8 +18,8 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_CPLX_H #define QMCPLUSPLUS_HYBRIDREP_CPLX_H -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "CPU/SIMD/inner_product.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h" namespace qmcplusplus { @@ -29,222 +29,7 @@ namespace qmcplusplus * Only works with SPLINEBASE class containing complex splines */ template -class HybridRepCplx : public SPLINEBASE, private HybridRepCenterOrbitals -{ -public: - using HYBRIDBASE = HybridRepCenterOrbitals; - using ST = typename SPLINEBASE::DataType; - using PointType = typename SPLINEBASE::PointType; - using SingleSplineType = typename SPLINEBASE::SingleSplineType; - using RealType = typename SPLINEBASE::RealType; - // types for evaluation results - using typename SPLINEBASE::GGGVector; - using typename SPLINEBASE::GradMatrix; - using typename SPLINEBASE::GradType; - using typename SPLINEBASE::GradVector; - using typename SPLINEBASE::HessVector; - using typename SPLINEBASE::OffloadMWVGLArray; - using typename SPLINEBASE::ValueMatrix; - using typename SPLINEBASE::ValueType; - using typename SPLINEBASE::ValueVector; - -private: - using typename HYBRIDBASE::Region; - - ValueVector psi_AO, d2psi_AO; - GradVector dpsi_AO; - Matrix> multi_myV; - typename HYBRIDBASE::LocationSmoothingInfo info; - - using SPLINEBASE::myG; - using SPLINEBASE::myH; - using SPLINEBASE::myL; - using SPLINEBASE::myV; - -public: - HybridRepCplx(const std::string& my_name) : SPLINEBASE(my_name) {} - - std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } - std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } - bool isOMPoffload() const final { return false; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - SPLINEBASE::resizeStorage(n, nvals); - HYBRIDBASE::resizeStorage(myV.size()); - } - - void bcast_tables(Communicate* comm) - { - SPLINEBASE::bcast_tables(comm); - HYBRIDBASE::bcast_tables(comm); - } - - void gather_tables(Communicate* comm) - { - SPLINEBASE::gather_tables(comm); - HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); - } - - bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } - - bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } - - inline void flush_zero() - { - //SPLINEBASE::flush_zero(); - HYBRIDBASE::flush_zero(); - } - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override - { - HYBRIDBASE::evaluate_v(P, iat, myV, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(P, iat, psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_v(P.activeR(iat), myV, psi, 0, myV.size() / 2); - else - { - psi_AO.resize(psi.size()); - SPLINEBASE::assign_v(P.activeR(iat), myV, psi_AO, 0, myV.size() / 2); - SPLINEBASE::evaluateValue(P, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - } - - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - if (VP.isOnSphere()) - { - // resize scratch space - psi_AO.resize(psi.size()); - if (multi_myV.rows() < VP.getTotalNum()) - multi_myV.resize(VP.getTotalNum(), myV.size()); - HYBRIDBASE::evaluateValuesC2X(VP, multi_myV, info); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(VP, iat, psi); - else if (info.region == Region::INSIDE) - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(VP.R[iat], myV_one, psi, 0, myV.size() / 2); - } - else - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(VP.R[iat], myV_one, psi_AO, 0, myV.size() / 2); - SPLINEBASE::evaluateValue(VP, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - else - { - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final - { - BsplineSet::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); - } - - void evaluateVGL(const ParticleSet& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_vgl_from_l(P.activeR(iat), psi, dpsi, d2psi); - else - { - psi_AO.resize(psi.size()); - dpsi_AO.resize(psi.size()); - d2psi_AO.resize(psi.size()); - SPLINEBASE::assign_vgl_from_l(P.activeR(iat), psi_AO, dpsi_AO, d2psi_AO); - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO, info); - } - } - - void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final - { - BsplineSet::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final - { - BsplineSet::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); - } - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluate_vgh not implemented!"); - HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - else - SPLINEBASE::assign_vgh(P.activeR(iat), psi, dpsi, grad_grad_psi, 0, myV.size() / 2); - } - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluate_vghgh not implemented!"); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final - { - // bypass SPLINEBASE::evaluate_notranspose - BsplineSet::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - template - friend class HybridRepSetReader; - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using HybridRepCplx = HybridRepCplxT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h new file mode 100644 index 0000000000..b84b263c9e --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h @@ -0,0 +1,267 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_HYBRIDREP_CPLXT_H +#define QMCPLUSPLUS_HYBRIDREP_CPLXT_H + +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +#include "CPU/SIMD/inner_product.hpp" +namespace qmcplusplus +{ +/** hybrid representation orbitals combining B-spline orbitals on a grid and + * atomic centered orbitals. + * @tparam SPLINEBASE B-spline orbital class. + * + * Only works with SPLINEBASE class containing complex splines + */ +template +class HybridRepCplxT : public SPLINEBASE, + private HybridRepCenterOrbitalsT +{ +public: + using HYBRIDBASE = HybridRepCenterOrbitalsT; + using ST = typename SPLINEBASE::DataType; + using PointType = typename SPLINEBASE::PointType; + using SingleSplineType = typename SPLINEBASE::SingleSplineType; + using RealType = typename SPLINEBASE::RealType; + // types for evaluation results + using typename SPLINEBASE::GGGVector; + using typename SPLINEBASE::GradMatrix; + using typename SPLINEBASE::GradType; + using typename SPLINEBASE::GradVector; + using typename SPLINEBASE::HessVector; + using typename SPLINEBASE::OffloadMWVGLArray; + using typename SPLINEBASE::ValueMatrix; + using typename SPLINEBASE::ValueType; + using typename SPLINEBASE::ValueVector; + +private: + ValueVector psi_AO, d2psi_AO; + GradVector dpsi_AO; + Matrix> multi_myV; + + using SPLINEBASE::HalfG; + using SPLINEBASE::myG; + using SPLINEBASE::myH; + using SPLINEBASE::myL; + using SPLINEBASE::myV; + +public: + HybridRepCplxT(const std::string& my_name) : SPLINEBASE(my_name) {} + + std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } + std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } + bool isOMPoffload() const final { return false; } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + SPLINEBASE::resizeStorage(n, nvals); + HYBRIDBASE::resizeStorage(myV.size()); + } + + void bcast_tables(Communicate* comm) + { + SPLINEBASE::bcast_tables(comm); + HYBRIDBASE::bcast_tables(comm); + } + + void gather_tables(Communicate* comm) + { + SPLINEBASE::gather_tables(comm); + HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); + } + + bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } + + bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } + + inline void flush_zero() + { + // SPLINEBASE::flush_zero(); + HYBRIDBASE::flush_zero(); + } + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_v(P, iat, myV); + const RealType cone(1); + if (smooth_factor < 0) + { + SPLINEBASE::evaluateValue(P, iat, psi); + } + else if (smooth_factor == cone) + { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_v(r, myV, psi, 0, myV.size() / 2); + } + else + { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + SPLINEBASE::assign_v(r, myV, psi_AO, 0, myV.size() / 2); + SPLINEBASE::evaluateValue(P, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + } + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override + { + if (VP.isOnSphere()) + { + // resize scratch space + psi_AO.resize(psi.size()); + if (multi_myV.rows() < VP.getTotalNum()) + multi_myV.resize(VP.getTotalNum(), myV.size()); + const RealType smooth_factor = HYBRIDBASE::evaluateValuesC2X(VP, multi_myV); + const RealType cone(1); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + if (smooth_factor < 0) + SPLINEBASE::evaluateValue(VP, iat, psi); + else if (smooth_factor == cone) + { + const PointType& r = VP.R[iat]; + Vector> myV_one(multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(r, myV_one, psi, 0, myV.size() / 2); + } + else + { + const PointType& r = VP.R[iat]; + Vector> myV_one(multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(r, myV_one, psi_AO, 0, myV.size() / 2); + SPLINEBASE::evaluateValue(VP, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + else + { + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + } + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final + { + BsplineSetT::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); + } + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL); + const RealType cone(1); + if (smooth_factor < 0) + { + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + } + else if (smooth_factor == cone) + { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_vgl_from_l(r, psi, dpsi, d2psi); + } + else + { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + dpsi_AO.resize(psi.size()); + d2psi_AO.resize(psi.size()); + SPLINEBASE::assign_vgl_from_l(r, psi_AO, dpsi_AO, d2psi_AO); + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO); + } + } + + void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final + { + BsplineSetT::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const final + { + BsplineSetT::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, + grads); + } + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluate_vgh not implemented!"); + if (HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH)) + { + const PointType& r = P.activeR(iat); + SPLINEBASE::assign_vgh(r, psi, dpsi, grad_grad_psi, 0, myV.size() / 2); + } + else + SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluate_vghgh not implemented!"); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final + { + // bypass SPLINEBASE::evaluate_notranspose + BsplineSetT::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + } + + template + friend class HybridRepSetReaderT; + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h index 3cf6a2065c..89dda48341 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h @@ -18,240 +18,13 @@ #ifndef QMCPLUSPLUS_HYBRIDREP_REAL_H #define QMCPLUSPLUS_HYBRIDREP_REAL_H -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "CPU/SIMD/inner_product.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepRealT.h" namespace qmcplusplus { -/** hybrid representation orbitals combining B-spline orbitals on a grid and atomic centered orbitals. - * @tparam SPLINEBASE B-spline orbital class. - * - * Only works with SPLINEBASE class containing real splines - */ template -class HybridRepReal : public SPLINEBASE, private HybridRepCenterOrbitals -{ -public: - using HYBRIDBASE = HybridRepCenterOrbitals; - using ST = typename SPLINEBASE::DataType; - using PointType = typename SPLINEBASE::PointType; - using SingleSplineType = typename SPLINEBASE::SingleSplineType; - using RealType = typename SPLINEBASE::RealType; - // types for evaluation results - using typename SPLINEBASE::GGGVector; - using typename SPLINEBASE::GradMatrix; - using typename SPLINEBASE::GradType; - using typename SPLINEBASE::GradVector; - using typename SPLINEBASE::HessVector; - using typename SPLINEBASE::OffloadMWVGLArray; - using typename SPLINEBASE::ValueMatrix; - using typename SPLINEBASE::ValueType; - using typename SPLINEBASE::ValueVector; - -private: - using typename HYBRIDBASE::Region; - - ValueVector psi_AO, d2psi_AO; - GradVector dpsi_AO; - Matrix> multi_myV; - typename HYBRIDBASE::LocationSmoothingInfo info; - - using SPLINEBASE::HalfG; - using SPLINEBASE::myG; - using SPLINEBASE::myH; - using SPLINEBASE::myL; - using SPLINEBASE::myV; - using SPLINEBASE::PrimLattice; - -public: - HybridRepReal(const std::string& my_name) : SPLINEBASE(my_name) {} - - std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } - std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } - bool isOMPoffload() const final { return false; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - SPLINEBASE::resizeStorage(n, nvals); - HYBRIDBASE::resizeStorage(myV.size()); - } - - void bcast_tables(Communicate* comm) - { - SPLINEBASE::bcast_tables(comm); - HYBRIDBASE::bcast_tables(comm); - } - - void gather_tables(Communicate* comm) - { - SPLINEBASE::gather_tables(comm); - HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); - } - - inline void flush_zero() - { - //SPLINEBASE::flush_zero(); - HYBRIDBASE::flush_zero(); - } - - bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } - - bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override - { - HYBRIDBASE::evaluate_v(P, iat, myV, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(P, iat, psi); - else if (info.region == Region::INSIDE) - { - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_v(bc_sign, myV, psi, 0, myV.size()); - } - else - { - psi_AO.resize(psi.size()); - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_v(bc_sign, myV, psi_AO, 0, myV.size()); - SPLINEBASE::evaluateValue(P, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - } - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - if (VP.isOnSphere() && HYBRIDBASE::is_batched_safe(VP)) - { - // resize scratch space - psi_AO.resize(psi.size()); - if (multi_myV.rows() < VP.getTotalNum()) - multi_myV.resize(VP.getTotalNum(), myV.size()); - std::vector bc_signs(VP.getTotalNum()); - HYBRIDBASE::evaluateValuesR2R(VP, PrimLattice, HalfG, multi_myV, bc_signs, info); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - if (info.region == Region::INTER) - SPLINEBASE::evaluateValue(VP, iat, psi); - else if (info.region == Region::INSIDE) - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi, 0, myV.size()); - } - else - { - Vector> myV_one(multi_myV[iat], myV.size()); - SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi_AO, 0, myV.size()); - SPLINEBASE::evaluateValue(VP, iat, psi); - HYBRIDBASE::interpolate_buffer_v(psi, psi_AO, info.f); - } - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - else - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } - } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final - { - BsplineSet::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); - } - - void evaluateVGL(const ParticleSet& P, const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL, info); - if (info.region == Region::INTER) - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - else if (info.region == Region::INSIDE) - SPLINEBASE::assign_vgl_from_l(HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG), psi, - dpsi, d2psi); - else - { - psi_AO.resize(psi.size()); - dpsi_AO.resize(psi.size()); - d2psi_AO.resize(psi.size()); - int bc_sign = HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG); - SPLINEBASE::assign_vgl_from_l(bc_sign, psi_AO, dpsi_AO, d2psi_AO); - SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); - HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO, info); - } - } - - void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final - { - BsplineSet::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); - } - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final - { - BsplineSet::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); - } - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - APP_ABORT("HybridRepReal::evaluateVGH not implemented!"); - HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH, info); - if (info.region == Region::INTER) - SPLINEBASE::assign_vgh(HYBRIDBASE::get_bc_sign(P.activeR(iat), info.r_image, PrimLattice, HalfG), psi, dpsi, - grad_grad_psi, 0, myV.size()); - else - SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - APP_ABORT("HybridRepCplx::evaluateVGHGH not implemented!"); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final - { - // bypass SPLINEBASE::evaluate_notranspose - BsplineSet::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - template - friend class HybridRepSetReader; - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; +using HybridRepReal = HybridRepRealT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h new file mode 100644 index 0000000000..40c9b2e7cc --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h @@ -0,0 +1,277 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file HybridRepReal.h + * + * hold HybridRepReal + */ +#ifndef QMCPLUSPLUS_HYBRIDREP_REALT_H +#define QMCPLUSPLUS_HYBRIDREP_REALT_H + +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +#include "CPU/SIMD/inner_product.hpp" + +namespace qmcplusplus +{ +/** hybrid representation orbitals combining B-spline orbitals on a grid and + * atomic centered orbitals. + * @tparam SPLINEBASE B-spline orbital class. + * + * Only works with SPLINEBASE class containing real splines + */ +template +class HybridRepRealT : public SPLINEBASE, + private HybridRepCenterOrbitalsT +{ +public: + using HYBRIDBASE = HybridRepCenterOrbitalsT; + using ST = typename SPLINEBASE::DataType; + using PointType = typename SPLINEBASE::PointType; + using SingleSplineType = typename SPLINEBASE::SingleSplineType; + using RealType = typename SPLINEBASE::RealType; + // types for evaluation results + using typename SPLINEBASE::GGGVector; + using typename SPLINEBASE::GradMatrix; + using typename SPLINEBASE::GradType; + using typename SPLINEBASE::GradVector; + using typename SPLINEBASE::HessVector; + using typename SPLINEBASE::OffloadMWVGLArray; + using typename SPLINEBASE::ValueMatrix; + using typename SPLINEBASE::ValueType; + using typename SPLINEBASE::ValueVector; + +private: + ValueVector psi_AO, d2psi_AO; + GradVector dpsi_AO; + Matrix> multi_myV; + + using SPLINEBASE::HalfG; + using SPLINEBASE::myG; + using SPLINEBASE::myH; + using SPLINEBASE::myL; + using SPLINEBASE::myV; + using SPLINEBASE::PrimLattice; + +public: + HybridRepRealT(const std::string& my_name) : SPLINEBASE(my_name) {} + + std::string getClassName() const final { return "Hybrid" + SPLINEBASE::getClassName(); } + std::string getKeyword() const final { return "Hybrid" + SPLINEBASE::getKeyword(); } + bool isOMPoffload() const final { return false; } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + SPLINEBASE::resizeStorage(n, nvals); + HYBRIDBASE::resizeStorage(myV.size()); + } + + void bcast_tables(Communicate* comm) + { + SPLINEBASE::bcast_tables(comm); + HYBRIDBASE::bcast_tables(comm); + } + + void gather_tables(Communicate* comm) + { + SPLINEBASE::gather_tables(comm); + HYBRIDBASE::gather_atomic_tables(comm, SPLINEBASE::offset); + } + + inline void flush_zero() + { + // SPLINEBASE::flush_zero(); + HYBRIDBASE::flush_zero(); + } + + bool read_splines(hdf_archive& h5f) { return HYBRIDBASE::read_splines(h5f) && SPLINEBASE::read_splines(h5f); } + + bool write_splines(hdf_archive& h5f) { return HYBRIDBASE::write_splines(h5f) && SPLINEBASE::write_splines(h5f); } + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_v(P, iat, myV); + const RealType cone(1); + if (smooth_factor < 0) + { + SPLINEBASE::evaluateValue(P, iat, psi); + } + else if (smooth_factor == cone) + { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_v(bc_sign, myV, psi, 0, myV.size()); + } + else + { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_v(bc_sign, myV, psi_AO, 0, myV.size()); + SPLINEBASE::evaluateValue(P, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + } + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override + { + if (VP.isOnSphere() && HYBRIDBASE::is_batched_safe(VP)) + { + // resize scratch space + psi_AO.resize(psi.size()); + if (multi_myV.rows() < VP.getTotalNum()) + multi_myV.resize(VP.getTotalNum(), myV.size()); + std::vector bc_signs(VP.getTotalNum()); + const RealType smooth_factor = HYBRIDBASE::evaluateValuesR2R(VP, PrimLattice, HalfG, multi_myV, bc_signs); + const RealType cone(1); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + if (smooth_factor < 0) + SPLINEBASE::evaluateValue(VP, iat, psi); + else if (smooth_factor == cone) + { + Vector> myV_one(multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi, 0, myV.size()); + } + else + { + Vector> myV_one(multi_myV[iat], myV.size()); + SPLINEBASE::assign_v(bc_signs[iat], myV_one, psi_AO, 0, myV.size()); + SPLINEBASE::evaluateValue(VP, iat, psi); + HYBRIDBASE::interpolate_buffer_v(psi, psi_AO); + } + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + else + { + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } + } + } + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final + { + BsplineSetT::mw_evaluateDetRatios(spo_list, vp_list, psi_list, invRow_ptr_list, ratios_list); + } + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override + { + const RealType smooth_factor = HYBRIDBASE::evaluate_vgl(P, iat, myV, myG, myL); + const RealType cone(1); + if (smooth_factor < 0) + { + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + } + else if (smooth_factor == cone) + { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgl_from_l(bc_sign, psi, dpsi, d2psi); + } + else + { + const PointType& r = P.activeR(iat); + psi_AO.resize(psi.size()); + dpsi_AO.resize(psi.size()); + d2psi_AO.resize(psi.size()); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgl_from_l(bc_sign, psi_AO, dpsi_AO, d2psi_AO); + SPLINEBASE::evaluateVGL(P, iat, psi, dpsi, d2psi); + HYBRIDBASE::interpolate_buffer_vgl(psi, dpsi, d2psi, psi_AO, dpsi_AO, d2psi_AO); + } + } + + void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final + { + BsplineSetT::mw_evaluateVGL(sa_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); + } + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const final + { + BsplineSetT::mw_evaluateVGLandDetRatioGrads(spo_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, + grads); + } + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override + { + APP_ABORT("HybridRepReal::evaluateVGH not implemented!"); + if (HYBRIDBASE::evaluate_vgh(P, iat, myV, myG, myH)) + { + const PointType& r = P.activeR(iat); + int bc_sign = HYBRIDBASE::get_bc_sign(r, PrimLattice, HalfG); + SPLINEBASE::assign_vgh(bc_sign, psi, dpsi, grad_grad_psi, 0, myV.size()); + } + else + SPLINEBASE::evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + APP_ABORT("HybridRepCplx::evaluateVGHGH not implemented!"); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final + { + // bypass SPLINEBASE::evaluate_notranspose + BsplineSetT::evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + } + + template + friend class HybridRepSetReaderT; + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h similarity index 88% rename from src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h rename to src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h index 1e25e2ae11..73916e394e 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h @@ -9,21 +9,16 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#ifndef QMCPLUSPLUS_HYBRIDREP_READERT_H +#define QMCPLUSPLUS_HYBRIDREP_READERT_H -/** @file - * - * derived from SplineSetReader - */ - -#ifndef QMCPLUSPLUS_HYBRIDREP_READER_H -#define QMCPLUSPLUS_HYBRIDREP_READER_H - -#include "Numerics/Quadrature.h" -#include "Numerics/Bessel.h" -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "OhmmsData/AttributeSet.h" #include "CPU/math.hpp" #include "Concurrency/OpenMP.h" +#include "Numerics/Bessel.h" +#include "Numerics/Quadrature.h" +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" namespace qmcplusplus { @@ -132,22 +127,22 @@ struct Gvectors } }; - /** General HybridRepSetReader to handle any unitcell */ template -class HybridRepSetReader : public SplineSetReader +class HybridRepSetReaderT : public SplineSetReaderT { public: - using BaseReader = SplineSetReader; + using BaseReader = SplineSetReaderT; using BaseReader::bspline; using BaseReader::mybuilder; using BaseReader::rotate_phase_i; using BaseReader::rotate_phase_r; using typename BaseReader::DataType; + using typename BaseReader::ValueType; - HybridRepSetReader(EinsplineSetBuilder* e) : BaseReader(e) {} + HybridRepSetReaderT(EinsplineSetBuilderT* e) : BaseReader(e) {} /** initialize basic parameters of atomic orbitals */ void initialize_hybridrep_atomic_centers() override @@ -160,13 +155,14 @@ class HybridRepSetReader : public SplineSetReader a.put(mybuilder->XMLRoot); // assign smooth_scheme if (scheme_name == "Consistent") - bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; + this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT; else if (scheme_name == "SmoothAll") bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL; else if (scheme_name == "SmoothPartial") bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; else - APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme name! Only allows Consistent, SmoothAll or " + APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme " + "name! Only allows Consistent, SmoothAll or " "SmoothPartial."); // assign smooth_function @@ -177,8 +173,8 @@ class HybridRepSetReader : public SplineSetReader else if (s_function_name == "linear") bspline->smooth_func_id = smoothing_functions::LINEAR; else - APP_ABORT( - "initialize_hybridrep_atomic_centers wrong smoothing_function name! Only allows LEKS2018, coscos or linear."); + APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_function " + "name! Only allows LEKS2018, coscos or linear."); app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name << " smoothing function." << std::endl; @@ -195,24 +191,28 @@ class HybridRepSetReader : public SplineSetReader const int my_GroupID = ACInfo.GroupID[center_idx]; if (ACInfo.cutoff[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs parameter 'cutoff_radius' for atom " << center_idx - << std::endl; + app_error() << "Hybrid orbital representation needs " + "parameter 'cutoff_radius' for atom " + << center_idx << std::endl; success = false; } if (ACInfo.inner_cutoff[center_idx] < 0) { const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); - app_log() << "Hybrid orbital representation setting 'inner_cutoff' to " << inner_cutoff << " for group " - << my_GroupID << " as atom " << center_idx << std::endl; - // overwrite the inner_cutoff of all the atoms of the same species + app_log() << "Hybrid orbital representation setting " + "'inner_cutoff' to " + << inner_cutoff << " for group " << my_GroupID << " as atom " << center_idx << std::endl; + // overwrite the inner_cutoff of all the atoms of the same + // species for (int id = 0; id < ACInfo.Ncenters; id++) if (my_GroupID == ACInfo.GroupID[id]) ACInfo.inner_cutoff[id] = inner_cutoff; } else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx]) { - app_error() << "Hybrid orbital representation 'inner_cutoff' must be smaller than 'spline_radius' for atom " + app_error() << "Hybrid orbital representation 'inner_cutoff' must " + "be smaller than 'spline_radius' for atom " << center_idx << std::endl; success = false; } @@ -221,14 +221,17 @@ class HybridRepSetReader : public SplineSetReader { if (ACInfo.lmax[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs parameter 'lmax' for atom " << center_idx << std::endl; + app_error() << "Hybrid orbital representation needs " + "parameter 'lmax' for atom " + << center_idx << std::endl; success = false; } if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0) { - app_log() << "Parameters 'spline_radius' and 'spline_npoints' for group " << my_GroupID << " as atom " - << center_idx << " are not specified." << std::endl; + app_log() << "Parameters 'spline_radius' and " + "'spline_npoints' for group " + << my_GroupID << " as atom " << center_idx << " are not specified." << std::endl; const double delta = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3; for (int id = 0; id < ACInfo.Ncenters; id++) @@ -245,15 +248,17 @@ class HybridRepSetReader : public SplineSetReader { if (ACInfo.spline_radius[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs parameter 'spline_radius' for atom " << center_idx - << std::endl; + app_error() << "Hybrid orbital representation needs " + "parameter 'spline_radius' for atom " + << center_idx << std::endl; success = false; } if (ACInfo.spline_npoints[center_idx] < 0) { - app_error() << "Hybrid orbital representation needs parameter 'spline_npoints' for atom " << center_idx - << std::endl; + app_error() << "Hybrid orbital representation needs " + "parameter 'spline_npoints' for atom " + << center_idx << std::endl; success = false; } } @@ -263,9 +268,13 @@ class HybridRepSetReader : public SplineSetReader 2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1); if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff) { - app_error() << "Hybrid orbital representation requires cutoff_radius<=" << max_allowed_cutoff - << " calculated by spline_radius-2*spline_radius/(spline_npoints-1) for atom " << center_idx - << std::endl; + app_error() << "Hybrid orbital representation requires " + "cutoff_radius<=" + << max_allowed_cutoff + << " calculated by " + "spline_radius-2*spline_radius/" + "(spline_npoints-1) for atom " + << center_idx << std::endl; success = false; } } @@ -278,12 +287,13 @@ class HybridRepSetReader : public SplineSetReader } } if (!success) - BaseReader::myComm->barrier_and_abort("initialize_hybridrep_atomic_centers Failed to initialize atomic centers " + BaseReader::myComm->barrier_and_abort("initialize_hybridrep_atomic_centers Failed to initialize " + "atomic centers " "in hybrid orbital representation!"); for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) { - AtomicOrbitals oneCenter(ACInfo.lmax[center_idx]); + AtomicOrbitalsT oneCenter(ACInfo.lmax[center_idx]); oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx], ACInfo.spline_npoints[center_idx]); @@ -300,7 +310,7 @@ class HybridRepSetReader : public SplineSetReader band_group_comm.bcast(rotate_phase_r); band_group_comm.bcast(rotate_phase_i); band_group_comm.bcast(cG); - //distribute G-vectors over processor groups + // distribute G-vectors over processor groups const int Ngvecs = mybuilder->Gvecs[0].size(); const int Nprocs = band_group_comm.size(); const int Ngvecgroups = std::min(Ngvecs, Nprocs); @@ -311,12 +321,13 @@ class HybridRepSetReader : public SplineSetReader const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; // prepare Gvecs Ylm(G) - using UnitCellType = typename EinsplineSetBuilder::UnitCellType; + using UnitCellType = typename EinsplineSetBuilderT::UnitCellType; Gvectors Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline->HalfG, gvec_first, gvec_last); - // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; + // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << + // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; - std::vector>& centers = bspline->AtomicCenters; + std::vector>& centers = bspline->AtomicCenters; app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; // collect atomic centers by group std::vector uniq_species; @@ -431,7 +442,8 @@ class HybridRepSetReader : public SplineSetReader for (size_t ig = ig_first; ig < ig_last; ig++) { const size_t ig_local = ig - ig_first; - // calculate phase shift for all the centers of this group + // calculate phase shift for all the centers of this + // group Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]); Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); } @@ -520,7 +532,8 @@ class HybridRepSetReader : public SplineSetReader } } } - //app_log() << "Building band " << iorb << " at center " << center_idx << std::endl; + // app_log() << "Building band " << iorb << " at center " << + // center_idx << std::endl; for (size_t idx = 0; idx < natoms; idx++) { diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h index 9410e80cfb..54528af444 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h @@ -18,212 +18,13 @@ #ifndef QMCPLUSPLUS_SPLINE_C2C_H #define QMCPLUSPLUS_SPLINE_C2C_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2CT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. - */ -template -class SplineC2C : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - - // types for evaluation results - using ComplexT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2C(const std::string& my_name) : BsplineSet(my_name) {} - - SplineC2C(const SplineC2C& in); - virtual std::string getClassName() const override { return "SplineC2C"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - bool isRotationSupported() const override { return true; } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - Some results (using older Berkeley branch) were published in [3]. - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2C; -extern template class SplineC2C; +template +using SplineC2C = SplineC2CT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h index 774c646118..92b29539ef 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.h @@ -18,299 +18,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H #define QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Utilities/FairDivide.h" -#include "Utilities/TimerManager.h" -#include -#include "SplineOMPTargetMultiWalkerMem.h" +#include "SplineC2COMPTargetT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (complex) SPOs with OpenMP offload - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * All the output orbitals are complex. - */ -template -class SplineC2COMPTarget : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using ComplexT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; - -private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2COMPTarget(const std::string& my_name) - : BsplineSet(my_name), - offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), - GGt_offload(std::make_shared>(9)), - PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2COMPTarget(const SplineC2COMPTarget& in); - - virtual std::string getClassName() const override { return "SplineC2COMPTarget"; } - virtual std::string getKeyword() const override { return "SplineC2C"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] *= 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (uint32_t i = 0; i < 9; i++) - { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - const size_t nk = kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) - { - (*mKK)[i] = -dot(kPoints[i], kPoints[i]); - (*myKcart)(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2COMPTarget; -extern template class SplineC2COMPTarget; +template +using SplineC2COMPTarget = SplineC2COMPTargetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp similarity index 86% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp index 2db3525864..bc6a40b0f0 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTarget.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp @@ -9,58 +9,58 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2COMPTargetT.h" -#include "SplineC2COMPTarget.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/OMPTarget/ompReductionComplex.hpp" #include "ApplyPhaseC2C.hpp" #include "Concurrency/OpenMP.h" +#include "Platforms/OMPTarget/ompReductionComplex.hpp" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" +#include "spline2/MultiBsplineEval_OMPoffload.hpp" namespace qmcplusplus { -template -SplineC2COMPTarget::SplineC2COMPTarget(const SplineC2COMPTarget& in) = default; - -template -inline void SplineC2COMPTarget::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2COMPTargetT::SplineC2COMPTargetT(const SplineC2COMPTargetT& in) = default; + +template +inline void SplineC2COMPTargetT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { SplineInst->copy_spline(spline_r, 2 * ispline); SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2COMPTarget::read_splines(hdf_archive& h5f) +template +bool SplineC2COMPTargetT::read_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2COMPTarget::write_splines(hdf_archive& h5f) +template +bool SplineC2COMPTargetT::write_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void SplineC2COMPTarget::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void SplineC2COMPTargetT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST x = r[0], y = r[1], z = r[2]; const ST* restrict kx = myKcart->data(0); @@ -73,12 +73,12 @@ inline void SplineC2COMPTarget::assign_v(const PointType& r, const ST val_r = myV[2 * j]; const ST val_i = myV[2 * j + 1]; omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); + psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); } } -template -void SplineC2COMPTarget::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void SplineC2COMPTargetT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -86,7 +86,8 @@ void SplineC2COMPTarget::evaluateValue(const ParticleSet& P, const int iat, #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); @@ -94,11 +95,11 @@ void SplineC2COMPTarget::evaluateValue(const ParticleSet& P, const int iat, } } -template -void SplineC2COMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void SplineC2COMPTargetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { const int nVP = VP.getTotalNum(); psiinv_pos_copy.resize(psiinv.size() + nVP * 3); @@ -136,7 +137,7 @@ void SplineC2COMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, auto* myKcart_ptr = myKcart->data(); auto* psiinv_ptr = psiinv_pos_copy.data(); auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; { ScopedTimer offload(offload_timer_); @@ -186,15 +187,16 @@ void SplineC2COMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, } } -template -void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void SplineC2COMPTargetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; auto& mw_ratios_private = mw_mem.mw_ratios_private; @@ -204,7 +206,7 @@ void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& VP : vp_list) mw_nVP += VP.getTotalNum(); const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int)); @@ -222,7 +224,7 @@ void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& VP = vp_list[iw]; assert(ratios_list[iw].size() == VP.getTotalNum()); for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { @@ -254,7 +256,7 @@ void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeaderdata(); auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; { ScopedTimer offload(offload_timer_); @@ -311,13 +313,14 @@ void SplineC2COMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader -inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void SplineC2COMPTargetT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { constexpr ST two(2); const ST x = r[0], y = r[1], z = r[2]; @@ -330,7 +333,7 @@ inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, const ST* restrict g1 = myG.data(1); const ST* restrict g2 = myG.data(2); - const size_t N = last_spo - first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd for (size_t j = 0; j < N; ++j) { @@ -343,11 +346,11 @@ inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -367,7 +370,7 @@ inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); @@ -376,12 +379,12 @@ inline void SplineC2COMPTarget::assign_vgl_from_l(const PointType& r, } } -template -void SplineC2COMPTarget::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SplineC2COMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -406,7 +409,7 @@ void SplineC2COMPTarget::evaluateVGL(const ParticleSet& P, auto* GGt_ptr = GGt_offload->data(); auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; { ScopedTimer offload(offload_timer_); @@ -461,13 +464,14 @@ void SplineC2COMPTarget::evaluateVGL(const ParticleSet& P, } } -template -void SplineC2COMPTarget::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2COMPTargetT::evaluateVGLMultiPos( + const Vector>& multi_pos, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { const size_t num_pos = psi_v_list.size(); const size_t ChunkSizePerTeam = 512; @@ -488,7 +492,7 @@ void SplineC2COMPTarget::evaluateVGLMultiPos(const Vectordata(); auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; { ScopedTimer offload(offload_timer_); @@ -557,16 +561,16 @@ void SplineC2COMPTarget::evaluateVGLMultiPos(const Vector -void SplineC2COMPTarget::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2COMPTargetT::mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.getCastedLeader>(); + auto& phi_leader = sa_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& mw_pos_copy = mw_mem.mw_pos_copy; auto& mw_offload_scratch = mw_mem.mw_offload_scratch; @@ -591,17 +595,17 @@ void SplineC2COMPTarget::mw_evaluateVGL(const RefVectorWithLeader& s d2psi_v_list); } -template -void SplineC2COMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const +template +void SplineC2COMPTargetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& buffer_H2D = mw_mem.buffer_H2D; auto& rg_private = mw_mem.rg_private; @@ -652,7 +656,7 @@ void SplineC2COMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL auto* phi_vgl_ptr = phi_vgl_v.data(); auto* rg_private_ptr = rg_private.data(); const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t phi_vgl_stride = num_pos * orb_size; { @@ -719,8 +723,9 @@ void SplineC2COMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride; ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (int j = first_cplx; j < last_cplx; j++) + PRAGMA_OFFLOAD("omp parallel for \ + reduction(+: ratio, grad_x, grad_y, grad_z)") + for (size_t j = first_cplx; j < last_cplx; j++) { const size_t psiIndex = first_spo_local + j; @@ -760,16 +765,17 @@ void SplineC2COMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio}; } } -template -void SplineC2COMPTarget::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const + +template +void SplineC2COMPTargetT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -802,11 +808,11 @@ void SplineC2COMPTarget::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -823,7 +829,7 @@ void SplineC2COMPTarget::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); @@ -879,12 +885,12 @@ void SplineC2COMPTarget::assign_vgh(const PointType& r, } } -template -void SplineC2COMPTarget::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2COMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -892,7 +898,8 @@ void SplineC2COMPTarget::evaluateVGH(const ParticleSet& P, #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); @@ -900,17 +907,17 @@ void SplineC2COMPTarget::evaluateVGH(const ParticleSet& P, } } -template -void SplineC2COMPTarget::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void SplineC2COMPTargetT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -942,7 +949,7 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, const ST* restrict gh122 = mygH.data(8); const ST* restrict gh222 = mygH.data(9); -//SIMD doesn't work quite right yet. Comment out until further debugging. +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd for (size_t j = first; j < last; ++j) { @@ -955,11 +962,11 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -976,13 +983,14 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -1021,8 +1029,10 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -1066,7 +1076,8 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -1122,7 +1133,6 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); @@ -1135,13 +1145,13 @@ void SplineC2COMPTarget::assign_vghgh(const PointType& r, } } -template -void SplineC2COMPTarget::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void SplineC2COMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1155,13 +1165,13 @@ void SplineC2COMPTarget::evaluateVGHGH(const ParticleSet& P, } } -template -void SplineC2COMPTarget::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SplineC2COMPTargetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { // chunk the [first, last) loop into blocks to save temporary memory usage const int block_size = 16; @@ -1217,7 +1227,9 @@ void SplineC2COMPTarget::evaluate_notranspose(const ParticleSet& P, } } -template class SplineC2COMPTarget; -template class SplineC2COMPTarget; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; +template class SplineC2COMPTargetT>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h new file mode 100644 index 0000000000..3e07b734ae --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h @@ -0,0 +1,329 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SplineC2COMPTarget.h + * + * class to handle complex splines to complex orbitals with splines of arbitrary + * precision splines storage and computation is offloaded to accelerators using + * OpenMP target + */ +#ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGETT_H +#define QMCPLUSPLUS_SPLINE_C2C_OMPTARGETT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "SplineOMPTargetMultiWalkerMem.h" +#include "Utilities/FairDivide.h" +#include "Utilities/TimerManager.h" +#include "spline2/MultiBspline.hpp" +#include + +#include + +namespace qmcplusplus +{ +/** class to match std::complex spline with BsplineSet::ValueType (complex) + * SPOs with OpenMP offload + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of phase vectors + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. All the output orbitals are complex. + */ +template +class SplineC2COMPTargetT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + template + using OffloadVector = Vector>; + template + using OffloadPosVector = VectorSoaContainer>; + +private: + /// timer for offload portion + NewTimer& offload_timer_; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr, OffloadAllocator>> SplineInst; + + std::shared_ptr> mKK; + std::shared_ptr> myKcart; + std::shared_ptr> GGt_offload; + std::shared_ptr> PrimLattice_G_offload; + + ResourceHandle> mw_mem_handle_; + + /// team private ratios for reduction, numVP x numTeams + Matrix> ratios_private; + /// offload scratch space, dynamically resized to the maximal need + Vector> offload_scratch; + /// result scratch space, dynamically resized to the maximal need + Vector> results_scratch; + /// psiinv and position scratch space, used to avoid allocation on the fly + /// and faster transfer + Vector> psiinv_pos_copy; + /// position scratch space, used to avoid allocation on the fly and faster + /// transfer + Vector> multi_pos_copy; + + void evaluateVGLMultiPos(const Vector>& multi_pos_copy, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; + +protected: + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineC2COMPTargetT(const std::string& my_name) + : BsplineSetT(my_name), + offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)), + GGt_offload(std::make_shared>(9)), + PrimLattice_G_offload(std::make_shared>(9)) + {} + + SplineC2COMPTargetT(const SplineC2COMPTargetT& in); + + virtual std::string getClassName() const override { return "SplineC2COMPTarget"; } + virtual std::string getKeyword() const override { return "SplineC2C"; } + bool isComplex() const override { return true; }; + virtual bool isOMPoffload() const override { return true; } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique>()); + } + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + phi_leader.mw_mem_handle_ = collection.lendResource>(); + } + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + collection.takebackResource(phi_leader.mw_mem_handle_); + } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared, OffloadAllocator>>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + /// this routine can not be called from threaded region + void finalizeConstruction() override + { + // map the SplineInst->getSplinePtr() structure to GPU + auto* MultiSpline = SplineInst->getSplinePtr(); + auto* restrict coefs = MultiSpline->coefs; + // attach pointers on the device to achieve deep copy + PRAGMA_OFFLOAD("omp target \ + map(always, to: MultiSpline[0:1], \ + coefs[0:MultiSpline->coefs_size])") + { + MultiSpline->coefs = coefs; + } + + // transfer static data to GPU + auto* mKK_ptr = mKK->data(); + PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") + auto* myKcart_ptr = myKcart->data(); + PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") + for (size_t i = 0; i < 9; i++) + { + (*GGt_offload)[i] = GGt[i]; + (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; + } + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") + auto* GGt_ptr = GGt_offload->data(); + PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK = std::make_shared>(nk); + myKcart = std::make_shared>(nk); + for (size_t i = 0; i < nk; ++i) + { + (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); + (*myKcart)(i) = this->kPoints[i]; + } + } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + virtual void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + virtual void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + virtual void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + virtual void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + virtual void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp similarity index 83% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp index ee7623188e..1d493d4bd0 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp @@ -10,52 +10,53 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2CT.h" -#include -#include "Concurrency/OpenMP.h" -#include "SplineC2C.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "CPU/BLAS.hpp" #include "CPU/math.hpp" #include "CPU/SIMD/inner_product.hpp" -#include "CPU/BLAS.hpp" +#include "Concurrency/OpenMP.h" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" + +#include namespace qmcplusplus { -template -SplineC2C::SplineC2C(const SplineC2C& in) = default; - -template -inline void SplineC2C::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2CT::SplineC2CT(const SplineC2CT& in) = default; + +template +inline void SplineC2CT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { SplineInst->copy_spline(spline_r, 2 * ispline); SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2C::read_splines(hdf_archive& h5f) +template +bool SplineC2CT::read_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2C::write_splines(hdf_archive& h5f) +template +bool SplineC2CT::write_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -void SplineC2C::storeParamsBeforeRotation() +template +void SplineC2CT::storeParamsBeforeRotation() { const auto spline_ptr = SplineInst->getSplinePtr(); const auto coefs_tot_size = spline_ptr->coefs_size; @@ -103,8 +104,8 @@ void SplineC2C::storeParamsBeforeRotation() NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix" is very tall and skinny. */ -template -void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +template +void SplineC2CT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) { // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp const auto spline_ptr = SplineInst->getSplinePtr(); @@ -113,8 +114,8 @@ void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co const auto Nsplines = spline_ptr->num_splines; // May include padding const auto coefs_tot_size = spline_ptr->coefs_size; const auto basis_set_size = coefs_tot_size / Nsplines; - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); + assert(this->OrbitalSetSize == rot_mat.rows()); + assert(this->OrbitalSetSize == rot_mat.cols()); if (!use_stored_copy) { @@ -124,26 +125,28 @@ void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co if constexpr (std::is_same_v) { - //if ST is double, go ahead and use blas to make things faster - //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals. - //Also casting them as ValueType so they are complex to do the correct gemm - BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(), - OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0), + // if ST is double, go ahead and use blas to make things faster + // Note that Nsplines needs to be divided by 2 since spl_coefs and + // coef_copy_ are stored as reals. Also casting them as ValueType so + // they are complex to do the correct gemm + BLAS::gemm('N', 'N', this->OrbitalSetSize, basis_set_size, this->OrbitalSetSize, ValueType(1.0, 0.0), + rot_mat.data(), this->OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0), (ValueType*)spl_coefs, Nsplines / 2); } else { - // if ST is float, RealType is double and ValueType is std::complex for C2C - // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix + // if ST is float, RealType is double and ValueType is + // std::complex for C2C Just use naive matrix multiplication in + // order to avoid losing precision on rotation matrix for (IndexType i = 0; i < basis_set_size; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) + for (IndexType j = 0; j < this->OrbitalSetSize; j++) { // cur_elem points to the real componend of the coefficient. // Imag component is adjacent in memory. const auto cur_elem = Nsplines * i + 2 * j; ST newval_r{0.}; ST newval_i{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) + for (IndexType k = 0; k < this->OrbitalSetSize; k++) { const auto index = Nsplines * i + 2 * k; ST zr = (*coef_copy_)[index]; @@ -159,15 +162,15 @@ void SplineC2C::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co } } -template -inline void SplineC2C::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void SplineC2CT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST x = r[0], y = r[1], z = r[2]; const ST* restrict kx = myKcart.data(0); @@ -180,12 +183,12 @@ inline void SplineC2C::assign_v(const PointType& r, const ST val_r = myV[2 * j]; const ST val_i = myV[2 * j + 1]; qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c); - psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); + psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s); } } -template -void SplineC2C::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void SplineC2CT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -193,7 +196,8 @@ void SplineC2C::evaluateValue(const ParticleSet& P, const int iat, ValueVect #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last); @@ -201,11 +205,11 @@ void SplineC2C::evaluateValue(const ParticleSet& P, const int iat, ValueVect } } -template -void SplineC2C::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void SplineC2CT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { const bool need_resize = ratios_private.rows() < VP.getTotalNum(); @@ -215,15 +219,17 @@ void SplineC2C::evaluateDetRatios(const VirtualParticleSet& VP, // initialize thread private ratios if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier } int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; + const int last_cplx = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; for (int iat = 0; iat < VP.getTotalNum(); ++iat) { @@ -246,17 +252,17 @@ void SplineC2C::evaluateDetRatios(const VirtualParticleSet& VP, } /** assign_vgl - */ -template -inline void SplineC2C::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const + */ +template +inline void SplineC2CT::assign_vgl(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; constexpr ST zero(0); constexpr ST two(2); @@ -292,11 +298,11 @@ inline void SplineC2C::assign_vgl(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -317,7 +323,7 @@ inline void SplineC2C::assign_vgl(const PointType& r, const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG); const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); @@ -326,10 +332,14 @@ inline void SplineC2C::assign_vgl(const PointType& r, } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void SplineC2CT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { constexpr ST two(2); const ST x = r[0], y = r[1], z = r[2]; @@ -342,7 +352,7 @@ inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST* restrict g1 = myG.data(1); const ST* restrict g2 = myG.data(2); - const size_t N = last_spo - first_spo; + const size_t N = this->last_spo - this->first_spo; #pragma omp simd for (size_t j = 0; j < N; ++j) { @@ -355,11 +365,11 @@ inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -379,7 +389,7 @@ inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); @@ -388,12 +398,12 @@ inline void SplineC2C::assign_vgl_from_l(const PointType& r, ValueVector& ps } } -template -void SplineC2C::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SplineC2CT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -401,7 +411,8 @@ void SplineC2C::evaluateVGL(const ParticleSet& P, #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); @@ -409,16 +420,16 @@ void SplineC2C::evaluateVGL(const ParticleSet& P, } } -template -void SplineC2C::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void SplineC2CT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -451,11 +462,11 @@ void SplineC2C::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -472,7 +483,7 @@ void SplineC2C::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); @@ -528,12 +539,12 @@ void SplineC2C::assign_vgh(const PointType& r, } } -template -void SplineC2C::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2CT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -541,7 +552,8 @@ void SplineC2C::evaluateVGH(const ParticleSet& P, #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last); @@ -549,17 +561,17 @@ void SplineC2C::evaluateVGH(const ParticleSet& P, } } -template -void SplineC2C::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void SplineC2CT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -591,7 +603,7 @@ void SplineC2C::assign_vghgh(const PointType& r, const ST* restrict gh122 = mygH.data(8); const ST* restrict gh222 = mygH.data(9); -//SIMD doesn't work quite right yet. Comment out until further debugging. +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd for (size_t j = first; j < last; ++j) { @@ -604,11 +616,11 @@ void SplineC2C::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -625,13 +637,14 @@ void SplineC2C::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r); dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r); dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r); dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r); - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -670,8 +683,10 @@ void SplineC2C::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r); grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r); - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -715,7 +730,8 @@ void SplineC2C::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -771,7 +787,6 @@ void SplineC2C::assign_vghgh(const PointType& r, grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r); grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r); - grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r); grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r); grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r); @@ -784,20 +799,21 @@ void SplineC2C::assign_vghgh(const PointType& r, } } -template -void SplineC2C::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void SplineC2CT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); #pragma omp parallel { int first, last; - // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type + // Factor of 2 because psi is complex and the spline storage and + // evaluation uses a real type FairDivideAligned(2 * psi.size(), getAlignment(), omp_get_num_threads(), omp_get_thread_num(), first, last); spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last); @@ -805,7 +821,9 @@ void SplineC2C::evaluateVGHGH(const ParticleSet& P, } } -template class SplineC2C; -template class SplineC2C; +template class SplineC2CT>; +template class SplineC2CT>; +template class SplineC2CT>; +template class SplineC2CT>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h new file mode 100644 index 0000000000..d5e862d063 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h @@ -0,0 +1,235 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +/** @file + * + * class to handle complex splines to complex orbitals with splines of arbitrary + * precision + */ +#ifndef QMCPLUSPLUS_SPLINE_C2CT_H +#define QMCPLUSPLUS_SPLINE_C2CT_H + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include + +namespace qmcplusplus +{ +/** class to match std::complex spline with BsplineSet::ValueType (complex) + * SPOs + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of phase vectors + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. All the output orbitals are complex. + */ +template +class SplineC2CT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + + // types for evaluation results + using ComplexT = typename BsplineSetT::ValueType; + using typename BsplineSetT::IndexType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::RealType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + +private: + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; + + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; + + vContainer_type mKK; + VectorSoaContainer myKcart; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; + +protected: + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineC2CT(const std::string& my_name) : BsplineSetT(my_name) {} + + SplineC2CT(const SplineC2CT& in); + virtual std::string getClassName() const override { return "SplineC2C"; } + virtual std::string getKeyword() const override { return "SplineC2C"; } + bool isComplex() const override { return true; }; + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + bool isRotationSupported() const override { return true; } + + /// Store an original copy of the spline coefficients for orbital rotation + void storeParamsBeforeRotation() override; + + /* + Implements orbital rotations via [1,2]. + Should be called by RotatedSPOs::apply_rotation() + This implementation requires that NSPOs > Nelec. In other words, + if you want to run a orbopt wfn, you must include some virtual orbitals! + Some results (using older Berkeley branch) were published in [3]. + [1] Filippi & Fahy, JCP 112, (2000) + [2] Toulouse & Umrigar, JCP 126, (2007) + [3] Townsend et al., PRB 102, (2020) + */ + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] *= 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + const size_t nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) + { + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; + } + } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + /** assign_vgl + */ + void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) + const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h index 05b8c4a0b3..c98857e50f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2R.h @@ -21,197 +21,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_H #define QMCPLUSPLUS_SPLINE_C2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr> SplineInst; - - vContainer_type mKK; - VectorSoaContainer myKcart; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2R(const std::string& my_name) : BsplineSet(my_name), nComplexBands(0) {} - - SplineC2R(const SplineC2R& in); - virtual std::string getClassName() const override { return "SplineC2R"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK.resize(nk); - myKcart.resize(nk); - for (size_t i = 0; i < nk; ++i) - { - mKK[i] = -dot(kPoints[i], kPoints[i]); - myKcart(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - /** assign_vgl - */ - void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) - const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2R; -extern template class SplineC2R; +template +using SplineC2R = SplineC2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h index e41741e33c..25362d5f2f 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.h @@ -18,305 +18,12 @@ #ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H #define QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "Utilities/FairDivide.h" -#include "Utilities/TimerManager.h" -#include -#include "SplineOMPTargetMultiWalkerMem.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h" namespace qmcplusplus { -/** class to match std::complex spline with BsplineSet::ValueType (real) SPOs with OpenMP offload - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of phase vectors - * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. - * The first nComplexBands complex splines produce 2 real orbitals. - * The rest complex splines produce 1 real orbital. - * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize. - */ -template -class SplineC2ROMPTarget : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - - template - using OffloadVector = Vector>; - template - using OffloadPosVector = VectorSoaContainer>; - -private: - /// timer for offload portion - NewTimer& offload_timer_; - ///primitive cell - CrystalLattice PrimLattice; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///number of complex bands - int nComplexBands; - ///multi bspline set - std::shared_ptr, OffloadAllocator>> SplineInst; - - std::shared_ptr> mKK; - std::shared_ptr> myKcart; - std::shared_ptr> GGt_offload; - std::shared_ptr> PrimLattice_G_offload; - - ResourceHandle> mw_mem_handle_; - - ///team private ratios for reduction, numVP x numTeams - Matrix> ratios_private; - ///offload scratch space, dynamically resized to the maximal need - Vector> offload_scratch; - ///result scratch space, dynamically resized to the maximal need - Vector> results_scratch; - ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer - Vector> psiinv_pos_copy; - ///position scratch space, used to avoid allocation on the fly and faster transfer - Vector> multi_pos_copy; - - void evaluateVGLMultiPos(const Vector>& multi_pos_copy, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - -protected: - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineC2ROMPTarget(const std::string& my_name) - : BsplineSet(my_name), - offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), - nComplexBands(0), - GGt_offload(std::make_shared>(9)), - PrimLattice_G_offload(std::make_shared>(9)) - {} - - SplineC2ROMPTarget(const SplineC2ROMPTarget& in); - - virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; } - virtual std::string getKeyword() const override { return "SplineC2R"; } - bool isComplex() const override { return true; }; - virtual bool isOMPoffload() const override { return true; } - - void createResource(ResourceCollection& collection) const override - { - auto resource_index = collection.addResource(std::make_unique>()); - } - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - phi_leader.mw_mem_handle_ = collection.lendResource>(); - } - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override - { - assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); - collection.takebackResource(phi_leader.mw_mem_handle_); - } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - size_t npad = getAlignedSize(2 * n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - - for (size_t ib = 0; ib < offset.size(); ib++) - offset[ib] = offset[ib] * 2; - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - resize_kpoints(); - SplineInst = std::make_shared, OffloadAllocator>>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - /// this routine can not be called from threaded region - void finalizeConstruction() override - { - // map the SplineInst->getSplinePtr() structure to GPU - auto* MultiSpline = SplineInst->getSplinePtr(); - auto* restrict coefs = MultiSpline->coefs; - // attach pointers on the device to achieve deep copy - PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") - { - MultiSpline->coefs = coefs; - } - - // transfer static data to GPU - auto* mKK_ptr = mKK->data(); - PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") - auto* myKcart_ptr = myKcart->data(); - PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") - for (uint32_t i = 0; i < 9; i++) - { - (*GGt_offload)[i] = GGt[i]; - (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; - } - auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); - PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") - auto* GGt_ptr = GGt_offload->data(); - PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - /** remap kPoints to pack the double copy */ - inline void resize_kpoints() - { - nComplexBands = this->remap_kpoints(); - const int nk = kPoints.size(); - mKK = std::make_shared>(nk); - myKcart = std::make_shared>(nk); - for (size_t i = 0; i < nk; ++i) - { - (*mKK)[i] = -dot(kPoints[i], kPoints[i]); - (*myKcart)(i) = kPoints[i]; - } - } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - virtual void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - virtual void mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const; - - virtual void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - virtual void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineC2ROMPTarget; -extern template class SplineC2ROMPTarget; +template +using SplineC2ROMPTarget = SplineC2ROMPTargetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp similarity index 88% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp index 18c2a913c2..573dc117ef 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTarget.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp @@ -9,64 +9,64 @@ // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2ROMPTargetT.h" -#include "SplineC2ROMPTarget.h" -#include "spline2/MultiBsplineEval.hpp" -#include "spline2/MultiBsplineEval_OMPoffload.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" #include "ApplyPhaseC2R.hpp" #include "Concurrency/OpenMP.h" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" +#include "spline2/MultiBsplineEval_OMPoffload.hpp" namespace qmcplusplus { -template -SplineC2ROMPTarget::SplineC2ROMPTarget(const SplineC2ROMPTarget& in) = default; - -template -inline void SplineC2ROMPTarget::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2ROMPTargetT::SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in) = default; + +template +inline void SplineC2ROMPTargetT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { SplineInst->copy_spline(spline_r, 2 * ispline); SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2ROMPTarget::read_splines(hdf_archive& h5f) +template +bool SplineC2ROMPTargetT::read_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2ROMPTarget::write_splines(hdf_archive& h5f) +template +bool SplineC2ROMPTargetT::write_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void SplineC2ROMPTarget::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void SplineC2ROMPTargetT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST x = r[0], y = r[1], z = r[2]; const ST* restrict kx = myKcart->data(0); const ST* restrict ky = myKcart->data(1); const ST* restrict kz = myKcart->data(2); - TT* restrict psi_s = psi.data() + first_spo; + TT* restrict psi_s = psi.data() + this->first_spo; #pragma omp simd for (size_t j = first; j < std::min(nComplexBands, last); j++) { @@ -92,8 +92,8 @@ inline void SplineC2ROMPTarget::assign_v(const PointType& r, } } -template -void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void SplineC2ROMPTargetT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -115,7 +115,7 @@ void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); offload_scratch.resize(spline_padded_size); results_scratch.resize(sposet_padded_size); @@ -128,7 +128,7 @@ void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, const auto rux = ru[0], ruy = ru[1], ruz = ru[2]; const auto myKcart_padded_size = myKcart->capacity(); auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; const auto requested_orb_size = psi.size(); @@ -163,11 +163,11 @@ void SplineC2ROMPTarget::evaluateValue(const ParticleSet& P, const int iat, } } -template -void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void SplineC2ROMPTargetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { const int nVP = VP.getTotalNum(); psiinv_pos_copy.resize(psiinv.size() + nVP * 6); @@ -193,7 +193,7 @@ void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; ratios_private.resize(nVP, NumTeams); const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); offload_scratch.resize(spline_padded_size * nVP); results_scratch.resize(sposet_padded_size * nVP); @@ -205,7 +205,7 @@ void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, auto* myKcart_ptr = myKcart->data(); auto* psiinv_ptr = psiinv_pos_copy.data(); auto* ratios_private_ptr = ratios_private.data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; const auto requested_orb_size = psiinv.size(); @@ -261,15 +261,16 @@ void SplineC2ROMPTarget::evaluateDetRatios(const VirtualParticleSet& VP, } } -template -void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void SplineC2ROMPTargetT::mw_evaluateDetRatios( + const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D; auto& mw_ratios_private = mw_mem.mw_ratios_private; @@ -279,7 +280,7 @@ void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& VP : vp_list) mw_nVP += VP.getTotalNum(); const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int)); @@ -297,7 +298,7 @@ void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader& VP = vp_list[iw]; assert(ratios_list[iw].size() == VP.getTotalNum()); for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) { @@ -318,7 +319,7 @@ void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); mw_offload_scratch.resize(spline_padded_size * mw_nVP); mw_results_scratch.resize(sposet_padded_size * mw_nVP); @@ -330,7 +331,7 @@ void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeaderdata(); auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data(); auto* ratios_private_ptr = mw_ratios_private.data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; { @@ -392,13 +393,14 @@ void SplineC2ROMPTarget::mw_evaluateDetRatios(const RefVectorWithLeader -inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void SplineC2ROMPTargetT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { constexpr ST two(2); const ST x = r[0], y = r[1], z = r[2]; @@ -417,7 +419,7 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, const ST* restrict g2 = myG.data(2); ASSUME_ALIGNED(g2); - const size_t N = kPoints.size(); + const size_t N = this->kPoints.size(); #pragma omp simd for (size_t j = 0; j < nComplexBands; j++) @@ -431,11 +433,11 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -455,7 +457,7 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; psi[psiIndex + 1] = c * val_i + s * val_r; d2psi[psiIndex] = c * lap_r - s * lap_i; @@ -480,11 +482,11 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -500,7 +502,7 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, const ST gX_i = dX_i - val_r * kX; const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; @@ -512,12 +514,12 @@ inline void SplineC2ROMPTarget::assign_vgl_from_l(const PointType& r, } } -template -void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SplineC2ROMPTargetT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -526,7 +528,7 @@ void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); // for V(1)G(3)H(6) intermediate result offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS); // for V(1)G(3)L(1) final result @@ -543,7 +545,7 @@ void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, auto* GGt_ptr = GGt_offload->data(); auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; const auto requested_orb_size = psi.size(); @@ -600,19 +602,19 @@ void SplineC2ROMPTarget::evaluateVGL(const ParticleSet& P, } } -template -void SplineC2ROMPTarget::evaluateVGLMultiPos(const Vector>& multi_pos, - Vector>& offload_scratch, - Vector>& results_scratch, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2ROMPTargetT::evaluateVGLMultiPos(const Vector>& multi_pos, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { const size_t num_pos = psi_v_list.size(); const size_t ChunkSizePerTeam = 512; const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); // for V(1)G(3)H(6) intermediate result offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS); // for V(1)G(3)L(1) final result @@ -628,7 +630,7 @@ void SplineC2ROMPTarget::evaluateVGLMultiPos(const Vectordata(); auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); auto* myKcart_ptr = myKcart->data(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const size_t nComplexBands_local = nComplexBands; const auto requested_orb_size = psi_v_list[0].get().size(); @@ -698,16 +700,16 @@ void SplineC2ROMPTarget::evaluateVGLMultiPos(const Vector -void SplineC2ROMPTarget::mw_evaluateVGL(const RefVectorWithLeader& sa_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void SplineC2ROMPTargetT::mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { assert(this == &sa_list.getLeader()); - auto& phi_leader = sa_list.getCastedLeader>(); + auto& phi_leader = sa_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& mw_pos_copy = mw_mem.mw_pos_copy; auto& mw_offload_scratch = mw_mem.mw_offload_scratch; @@ -732,17 +734,17 @@ void SplineC2ROMPTarget::mw_evaluateVGL(const RefVectorWithLeader& s d2psi_v_list); } -template -void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const +template +void SplineC2ROMPTargetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { assert(this == &spo_list.getLeader()); - auto& phi_leader = spo_list.getCastedLeader>(); + auto& phi_leader = spo_list.template getCastedLeader(); auto& mw_mem = phi_leader.mw_mem_handle_.getResource(); auto& buffer_H2D = mw_mem.buffer_H2D; auto& rg_private = mw_mem.rg_private; @@ -771,7 +773,7 @@ void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL const size_t num_pos = nwalkers; const auto spline_padded_size = myV.size(); - const auto sposet_padded_size = getAlignedSize(OrbitalSetSize); + const auto sposet_padded_size = getAlignedSize(this->OrbitalSetSize); const size_t ChunkSizePerTeam = 512; const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam; @@ -794,7 +796,7 @@ void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL auto* phi_vgl_ptr = phi_vgl_v.data(); auto* rg_private_ptr = rg_private.data(); const size_t buffer_H2D_stride = buffer_H2D.cols(); - const size_t first_spo_local = first_spo; + const size_t first_spo_local = this->first_spo; const auto requested_orb_size = phi_vgl_v.size(2); const size_t phi_vgl_stride = num_pos * requested_orb_size; const size_t nComplexBands_local = nComplexBands; @@ -865,8 +867,9 @@ void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL const size_t last_real = omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size); ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0); - PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)") - for (int j = first_real; j < last_real; j++) + PRAGMA_OFFLOAD("omp parallel for \ + reduction(+: ratio, grad_x, grad_y, grad_z)") + for (size_t j = first_real; j < last_real; j++) { out_phi[j] = psi[j]; out_dphi_x[j] = dpsi_x[j]; @@ -905,16 +908,16 @@ void SplineC2ROMPTarget::mw_evaluateVGLandDetRatioGrads(const RefVectorWithL } } -template -void SplineC2ROMPTarget::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void SplineC2ROMPTargetT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -947,11 +950,11 @@ void SplineC2ROMPTarget::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -968,7 +971,7 @@ void SplineC2ROMPTarget::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; @@ -1051,11 +1054,11 @@ void SplineC2ROMPTarget::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -1072,7 +1075,7 @@ void SplineC2ROMPTarget::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; @@ -1129,12 +1132,12 @@ void SplineC2ROMPTarget::assign_vgh(const PointType& r, } } -template -void SplineC2ROMPTarget::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2ROMPTargetT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1148,17 +1151,17 @@ void SplineC2ROMPTarget::evaluateVGH(const ParticleSet& P, } } -template -void SplineC2ROMPTarget::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void SplineC2ROMPTargetT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -1190,7 +1193,7 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST* restrict gh122 = mygH.data(8); const ST* restrict gh222 = mygH.data(9); -//SIMD doesn't work quite right yet. Comment out until further debugging. +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd for (size_t j = first; j < std::min(nComplexBands, last); j++) { @@ -1203,11 +1206,11 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -1224,7 +1227,7 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; @@ -1235,7 +1238,8 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -1284,8 +1288,10 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -1329,7 +1335,8 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -1437,11 +1444,11 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -1458,13 +1465,14 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -1503,8 +1511,10 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -1548,7 +1558,8 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -1616,13 +1627,13 @@ void SplineC2ROMPTarget::assign_vghgh(const PointType& r, } } -template -void SplineC2ROMPTarget::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void SplineC2ROMPTargetT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1636,13 +1647,13 @@ void SplineC2ROMPTarget::evaluateVGHGH(const ParticleSet& P, } } -template -void SplineC2ROMPTarget::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SplineC2ROMPTargetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { // chunk the [first, last) loop into blocks to save temporary memory usage const int block_size = 16; @@ -1685,9 +1696,9 @@ void SplineC2ROMPTarget::evaluate_notranspose(const ParticleSet& P, multi_pos_copy[ipos * 6 + 4] = ru[1]; multi_pos_copy[ipos * 6 + 5] = ru[2]; - multi_psi_v.emplace_back(logdet[i + ipos], OrbitalSetSize); - multi_dpsi_v.emplace_back(dlogdet[i + ipos], OrbitalSetSize); - multi_d2psi_v.emplace_back(d2logdet[i + ipos], OrbitalSetSize); + multi_psi_v.emplace_back(logdet[i + ipos], this->OrbitalSetSize); + multi_dpsi_v.emplace_back(dlogdet[i + ipos], this->OrbitalSetSize); + multi_d2psi_v.emplace_back(d2logdet[i + ipos], this->OrbitalSetSize); psi_v_list.push_back(multi_psi_v[ipos]); dpsi_v_list.push_back(multi_dpsi_v[ipos]); @@ -1698,7 +1709,9 @@ void SplineC2ROMPTarget::evaluate_notranspose(const ParticleSet& P, } } -template class SplineC2ROMPTarget; -template class SplineC2ROMPTarget; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; +template class SplineC2ROMPTargetT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h new file mode 100644 index 0000000000..6f77759be0 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h @@ -0,0 +1,334 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SplineC2ROMPTarget.h + * + * class to handle complex splines to real orbitals with splines of arbitrary + * precision splines storage and computation is offloaded to accelerators using + * OpenMP target + */ +#ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGETT_H +#define QMCPLUSPLUS_SPLINE_C2R_OMPTARGETT_H + +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "SplineOMPTargetMultiWalkerMem.h" +#include "Utilities/FairDivide.h" +#include "Utilities/TimerManager.h" +#include "spline2/MultiBspline.hpp" +#include + +#include + +namespace qmcplusplus +{ +/** class to match std::complex spline with BsplineSet::ValueType (real) + * SPOs with OpenMP offload + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of phase vectors + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. The first nComplexBands complex + * splines produce 2 real orbitals. The rest complex splines produce 1 real + * orbital. All the output orbitals are real (C2R). The maximal number of output + * orbitals is OrbitalSetSize. + */ +template +class SplineC2ROMPTargetT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::ValueType; + using typename BsplineSetT::GradType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::GradMatrix; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + using typename BsplineSetT::ValueMatrix; + using typename BsplineSetT::OffloadMWVGLArray; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + + template + using OffloadVector = Vector>; + template + using OffloadPosVector = VectorSoaContainer>; + +private: + /// timer for offload portion + NewTimer& offload_timer_; + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// number of complex bands + int nComplexBands; + /// multi bspline set + std::shared_ptr, OffloadAllocator>> SplineInst; + + std::shared_ptr> mKK; + std::shared_ptr> myKcart; + std::shared_ptr> GGt_offload; + std::shared_ptr> PrimLattice_G_offload; + + ResourceHandle> mw_mem_handle_; + + /// team private ratios for reduction, numVP x numTeams + Matrix> ratios_private; + /// offload scratch space, dynamically resized to the maximal need + Vector> offload_scratch; + /// result scratch space, dynamically resized to the maximal need + Vector> results_scratch; + /// psiinv and position scratch space, used to avoid allocation on the fly + /// and faster transfer + Vector> psiinv_pos_copy; + /// position scratch space, used to avoid allocation on the fly and faster + /// transfer + Vector> multi_pos_copy; + + void evaluateVGLMultiPos(const Vector>& multi_pos_copy, + Vector>& offload_scratch, + Vector>& results_scratch, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; + +protected: + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineC2ROMPTargetT(const std::string& my_name) + : BsplineSetT(my_name), + offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)), + nComplexBands(0), + GGt_offload(std::make_shared>(9)), + PrimLattice_G_offload(std::make_shared>(9)) + {} + + SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in); + + virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; } + virtual std::string getKeyword() const override { return "SplineC2R"; } + bool isComplex() const override { return true; }; + virtual bool isOMPoffload() const override { return true; } + + void createResource(ResourceCollection& collection) const override + { + auto resource_index = collection.addResource(std::make_unique>()); + } + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + phi_leader.mw_mem_handle_ = collection.lendResource>(); + } + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override + { + assert(this == &spo_list.getLeader()); + auto& phi_leader = spo_list.template getCastedLeader(); + collection.takebackResource(phi_leader.mw_mem_handle_); + } + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] = this->offset[ib] * 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared, OffloadAllocator>>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + /// this routine can not be called from threaded region + void finalizeConstruction() override + { + // map the SplineInst->getSplinePtr() structure to GPU + auto* MultiSpline = SplineInst->getSplinePtr(); + auto* restrict coefs = MultiSpline->coefs; + // attach pointers on the device to achieve deep copy + PRAGMA_OFFLOAD("omp target \ + map(always, to: MultiSpline[0:1], \ + coefs[0:MultiSpline->coefs_size])") + { + MultiSpline->coefs = coefs; + } + + // transfer static data to GPU + auto* mKK_ptr = mKK->data(); + PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") + auto* myKcart_ptr = myKcart->data(); + PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") + for (size_t i = 0; i < 9; i++) + { + (*GGt_offload)[i] = GGt[i]; + (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; + } + auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); + PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") + auto* GGt_ptr = GGt_offload->data(); + PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + nComplexBands = this->remap_kpoints(); + const int nk = this->kPoints.size(); + mKK = std::make_shared>(nk); + myKcart = std::make_shared>(nk); + for (size_t i = 0; i < nk; ++i) + { + (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]); + (*myKcart)(i) = this->kPoints[i]; + } + } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + virtual void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + virtual void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + virtual void mw_evaluateVGL(const RefVectorWithLeader>& sa_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + virtual void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + virtual void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp similarity index 89% rename from src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp index 8b6504888d..f8b86a08cf 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineC2R.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp @@ -13,64 +13,64 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineC2RT.h" -#include "Concurrency/OpenMP.h" -#include "SplineC2R.h" -#include "spline2/MultiBsplineEval.hpp" -#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" #include "CPU/math.hpp" #include "CPU/SIMD/inner_product.hpp" +#include "Concurrency/OpenMP.h" +#include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" +#include "spline2/MultiBsplineEval.hpp" namespace qmcplusplus { -template -SplineC2R::SplineC2R(const SplineC2R& in) = default; - -template -inline void SplineC2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineC2RT::SplineC2RT(const SplineC2RT& in) = default; + +template +inline void SplineC2RT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { SplineInst->copy_spline(spline_r, 2 * ispline); SplineInst->copy_spline(spline_i, 2 * ispline + 1); } -template -bool SplineC2R::read_splines(hdf_archive& h5f) +template +bool SplineC2RT::read_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineC2R::write_splines(hdf_archive& h5f) +template +bool SplineC2RT::write_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -inline void SplineC2R::assign_v(const PointType& r, - const vContainer_type& myV, - ValueVector& psi, - int first, - int last) const +template +inline void SplineC2RT::assign_v(const PointType& r, + const vContainer_type& myV, + ValueVector& psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST x = r[0], y = r[1], z = r[2]; const ST* restrict kx = myKcart.data(0); const ST* restrict ky = myKcart.data(1); const ST* restrict kz = myKcart.data(2); - TT* restrict psi_s = psi.data() + first_spo; + TT* restrict psi_s = psi.data() + this->first_spo; const size_t requested_orb_size = psi.size(); #pragma omp simd for (size_t j = first; j < std::min(nComplexBands, last); j++) @@ -100,8 +100,8 @@ inline void SplineC2R::assign_v(const PointType& r, } } -template -void SplineC2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void SplineC2RT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -116,11 +116,11 @@ void SplineC2R::evaluateValue(const ParticleSet& P, const int iat, ValueVect } } -template -void SplineC2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void SplineC2RT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { const bool need_resize = ratios_private.rows() < VP.getTotalNum(); @@ -130,14 +130,15 @@ void SplineC2R::evaluateDetRatios(const VirtualParticleSet& VP, // initialize thread private ratios if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier } int first, last; FairDivideAligned(myV.size(), getAlignment(), omp_get_num_threads(), tid, first, last); const int first_cplx = first / 2; - const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2; + const int last_cplx = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2; for (int iat = 0; iat < VP.getTotalNum(); ++iat) { @@ -163,17 +164,17 @@ void SplineC2R::evaluateDetRatios(const VirtualParticleSet& VP, } /** assign_vgl - */ -template -inline void SplineC2R::assign_vgl(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const + */ +template +inline void SplineC2RT::assign_vgl(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; constexpr ST two(2); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), @@ -221,11 +222,11 @@ inline void SplineC2R::assign_vgl(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -247,7 +248,7 @@ inline void SplineC2R::assign_vgl(const PointType& r, const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; if (psiIndex < requested_orb_size) { psi[psiIndex] = c * val_r - s * val_i; @@ -278,11 +279,11 @@ inline void SplineC2R::assign_vgl(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -299,7 +300,7 @@ inline void SplineC2R::assign_vgl(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size) + if (const size_t psiIndex = this->first_spo + nComplexBands + j; psiIndex < requested_orb_size) { psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; @@ -315,10 +316,14 @@ inline void SplineC2R::assign_vgl(const PointType& r, } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void SplineC2RT::assign_vgl_from_l(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { constexpr ST two(2); const ST x = r[0], y = r[1], z = r[2]; @@ -337,7 +342,7 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST* restrict g2 = myG.data(2); ASSUME_ALIGNED(g2); - const size_t N = kPoints.size(); + const size_t N = this->kPoints.size(); #pragma omp simd for (size_t j = 0; j < nComplexBands; j++) @@ -351,11 +356,11 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -375,7 +380,7 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i); const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r); - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; psi[psiIndex + 1] = c * val_i + s * val_r; d2psi[psiIndex] = c * lap_r - s * lap_i; @@ -400,11 +405,11 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g0[jr]; const ST dY_r = g1[jr]; const ST dZ_r = g2[jr]; @@ -420,7 +425,7 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps const ST gX_i = dX_i - val_r * kX; const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; @@ -432,12 +437,12 @@ inline void SplineC2R::assign_vgl_from_l(const PointType& r, ValueVector& ps } } -template -void SplineC2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SplineC2RT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -452,16 +457,16 @@ void SplineC2R::evaluateVGL(const ParticleSet& P, } } -template -void SplineC2R::assign_vgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void SplineC2RT::assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -494,11 +499,11 @@ void SplineC2R::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -515,7 +520,7 @@ void SplineC2R::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; @@ -598,11 +603,11 @@ void SplineC2R::assign_vgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -619,7 +624,7 @@ void SplineC2R::assign_vgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; @@ -676,12 +681,12 @@ void SplineC2R::assign_vgh(const PointType& r, } } -template -void SplineC2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineC2RT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -695,17 +700,17 @@ void SplineC2R::evaluateVGH(const ParticleSet& P, } } -template -void SplineC2R::assign_vghgh(const PointType& r, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void SplineC2RT::assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7), @@ -737,7 +742,7 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST* restrict gh122 = mygH.data(8); const ST* restrict gh222 = mygH.data(9); -//SIMD doesn't work quite right yet. Comment out until further debugging. +// SIMD doesn't work quite right yet. Comment out until further debugging. #pragma omp simd for (size_t j = first; j < std::min(nComplexBands, last); j++) { @@ -750,11 +755,11 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -771,7 +776,7 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + jr; + const size_t psiIndex = this->first_spo + jr; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; @@ -782,7 +787,8 @@ void SplineC2R::assign_vghgh(const PointType& r, dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r; dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r; - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -831,8 +837,10 @@ void SplineC2R::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r; grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -876,7 +884,8 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -984,11 +993,11 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST val_r = myV[jr]; const ST val_i = myV[ji]; - //phase + // phase ST s, c; qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c); - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr]; const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr]; const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr]; @@ -1005,13 +1014,14 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST gY_i = dY_i - val_r * kY; const ST gZ_i = dZ_i - val_r * kZ; - const size_t psiIndex = first_spo + nComplexBands + j; + const size_t psiIndex = this->first_spo + nComplexBands + j; psi[psiIndex] = c * val_r - s * val_i; dpsi[psiIndex][0] = c * gX_r - s * gX_i; dpsi[psiIndex][1] = c * gY_r - s * gY_i; dpsi[psiIndex][2] = c * gZ_r - s * gZ_i; - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22); @@ -1050,8 +1060,10 @@ void SplineC2R::assign_vghgh(const PointType& r, grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i; grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -1095,7 +1107,8 @@ void SplineC2R::assign_vghgh(const PointType& r, const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i; const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r; const ST gh_xxy_r = @@ -1163,13 +1176,13 @@ void SplineC2R::assign_vghgh(const PointType& r, } } -template -void SplineC2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void SplineC2RT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru(PrimLattice.toUnit_floor(r)); @@ -1183,7 +1196,9 @@ void SplineC2R::evaluateVGHGH(const ParticleSet& P, } } -template class SplineC2R; -template class SplineC2R; +template class SplineC2RT; +template class SplineC2RT; +template class SplineC2RT; +template class SplineC2RT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h new file mode 100644 index 0000000000..baea7b7d0a --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h @@ -0,0 +1,221 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Anouar Benali, benali@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file + * + * class to handle complex splines to real orbitals with splines of arbitrary + * precision + */ +#ifndef QMCPLUSPLUS_SPLINE_C2RT_H +#define QMCPLUSPLUS_SPLINE_C2RT_H + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include + +namespace qmcplusplus +{ +/** class to match std::complex spline with BsplineSet::ValueType (real) + * SPOs + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of phase vectors + * The internal storage of complex spline coefficients uses double sized real + * arrays of ST type, aligned and padded. The first nComplexBands complex + * splines produce 2 real orbitals. The rest complex splines produce 1 real + * orbital. All the output orbitals are real (C2R). The maximal number of output + * orbitals is OrbitalSetSize. + */ +template +class SplineC2RT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using typename BsplineSetT::GGGVector; + using typename BsplineSetT::GradVector; + using typename BsplineSetT::HessVector; + using typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + +private: + /// primitive cell + CrystalLattice PrimLattice; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + ///CartesianUnit, e.g. Hessian + Tensor GGt; + /// number of complex bands + int nComplexBands; + /// multi bspline set + std::shared_ptr> SplineInst; + + vContainer_type mKK; + VectorSoaContainer myKcart; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; + +protected: + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineC2RT(const std::string& my_name) : BsplineSetT(my_name), nComplexBands(0) {} + + SplineC2RT(const SplineC2RT& in); + virtual std::string getClassName() const override { return "SplineC2R"; } + virtual std::string getKeyword() const override { return "SplineC2R"; } + bool isComplex() const override { return true; }; + + std::unique_ptr> makeClone() const override { return std::make_unique(*this); } + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + size_t npad = getAlignedSize(2 * n); + myV.resize(npad); + myG.resize(npad); + myL.resize(npad); + myH.resize(npad); + mygH.resize(npad); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + + for (size_t ib = 0; ib < this->offset.size(); ib++) + this->offset[ib] = this->offset[ib] * 2; + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + resize_kpoints(); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + /** remap kPoints to pack the double copy */ + inline void resize_kpoints() + { + nComplexBands = this->remap_kpoints(); + const int nk = this->kPoints.size(); + mKK.resize(nk); + myKcart.resize(nk); + for (size_t i = 0; i < nk; ++i) + { + mKK[i] = -dot(this->kPoints[i], this->kPoints[i]); + myKcart(i) = this->kPoints[i]; + } + } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + /** assign_vgl + */ + void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) + const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(const PointType& r, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h index 3de6fc33fc..ea51f936f2 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h @@ -16,208 +16,12 @@ #ifndef QMCPLUSPLUS_SPLINE_R2R_H #define QMCPLUSPLUS_SPLINE_R2R_H -#include -#include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" -#include "OhmmsSoA/VectorSoaContainer.h" -#include "spline2/MultiBspline.hpp" -#include "Utilities/FairDivide.h" +#include "QMCWaveFunctions/BsplineFactory/SplineR2RT.h" namespace qmcplusplus { -/** class to match ST real spline with BsplineSet::ValueType (real) SPOs - * @tparam ST precision of spline - * - * Requires temporage storage and multiplication of the sign of the real part of the phase - * Internal storage ST type arrays are aligned and padded. - */ -template -class SplineR2R : public BsplineSet -{ -public: - using SplineType = typename bspline_traits::SplineType; - using BCType = typename bspline_traits::BCType; - using DataType = ST; - using PointType = TinyVector; - using SingleSplineType = UBspline_3d_d; - // types for evaluation results - using TT = typename BsplineSet::ValueType; - using BsplineSet::GGGVector; - using BsplineSet::GradVector; - using BsplineSet::HessVector; - using BsplineSet::ValueVector; - - using vContainer_type = Vector>; - using gContainer_type = VectorSoaContainer; - using hContainer_type = VectorSoaContainer; - using ghContainer_type = VectorSoaContainer; - -private: - bool IsGamma; - ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian - Tensor GGt; - ///multi bspline set - std::shared_ptr> SplineInst; - - ///Copy of original splines for orbital rotation - std::shared_ptr> coef_copy_; - - ///thread private ratios for reduction when using nested threading, numVP x numThread - Matrix ratios_private; - - -protected: - ///primitive cell - CrystalLattice PrimLattice; - /// intermediate result vectors - vContainer_type myV; - vContainer_type myL; - gContainer_type myG; - hContainer_type myH; - ghContainer_type mygH; - -public: - SplineR2R(const std::string& my_name) : BsplineSet(my_name) {} - - SplineR2R(const SplineR2R& in); - virtual std::string getClassName() const override { return "SplineR2R"; } - virtual std::string getKeyword() const override { return "SplineR2R"; } - bool isComplex() const override { return false; }; - bool isRotationSupported() const override { return true; } - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - /// Store an original copy of the spline coefficients for orbital rotation - void storeParamsBeforeRotation() override; - - /* - Implements orbital rotations via [1,2]. - Should be called by RotatedSPOs::apply_rotation() - - This implementation requires that NSPOs > Nelec. In other words, - if you want to run a orbopt wfn, you must include some virtual orbitals! - - Some results (using older Berkeley branch) were published in [3]. - - [1] Filippi & Fahy, JCP 112, (2000) - [2] Toulouse & Umrigar, JCP 126, (2007) - [3] Townsend et al., PRB 102, (2020) - */ - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; - - inline void resizeStorage(size_t n, size_t nvals) - { - init_base(n); - const size_t npad = getAlignedSize(n); - myV.resize(npad); - myG.resize(npad); - myL.resize(npad); - myH.resize(npad); - mygH.resize(npad); - - IsGamma = ((HalfG[0] == 0) && (HalfG[1] == 0) && (HalfG[2] == 0)); - } - - void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } - - void gather_tables(Communicate* comm) - { - if (comm->size() == 1) - return; - const int Nbands = kPoints.size(); - const int Nbandgroups = comm->size(); - offset.resize(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, offset); - gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); - } - - template - void create_spline(GT& xyz_g, BCT& xyz_bc) - { - GGt = dot(transpose(PrimLattice.G), PrimLattice.G); - SplineInst = std::make_shared>(); - SplineInst->create(xyz_g, xyz_bc, myV.size()); - - app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " - << "for the coefficients in 3D spline orbital representation" << std::endl; - } - - inline void flush_zero() { SplineInst->flush_zero(); } - - void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); - - bool read_splines(hdf_archive& h5f); - - bool write_splines(hdf_archive& h5f); - - /** convert position in PrimLattice unit and return sign */ - inline int convertPos(const PointType& r, PointType& ru) - { - ru = PrimLattice.toUnit(r); - int bc_sign = 0; - for (int i = 0; i < D; i++) - if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) - ru[i] = ST(0.0); - else - { - ST img = std::floor(ru[i]); - ru[i] -= img; - bc_sign += HalfG[i] * (int)img; - } - return bc_sign; - } - - void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; - - void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override; - - void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; - - /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ - void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - void evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) override; - - void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) - const; - - void evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override; - - void assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first = 0, - int last = -1) const; - - void evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override; - - template - friend struct SplineSetReader; - friend struct BsplineReaderBase; -}; - -extern template class SplineR2R; -extern template class SplineR2R; +template +using SplineR2R = SplineR2RT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp similarity index 71% rename from src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp rename to src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp index 9498f54f12..32c99a0652 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp @@ -12,49 +12,49 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "SplineR2RT.h" +#include "CPU/BLAS.hpp" +#include "CPU/SIMD/inner_product.hpp" #include "Concurrency/OpenMP.h" -#include "SplineR2R.h" -#include "spline2/MultiBsplineEval.hpp" #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp" -#include "Platforms/CPU/BLAS.hpp" -#include "CPU/SIMD/inner_product.hpp" +#include "spline2/MultiBsplineEval.hpp" namespace qmcplusplus { -template -SplineR2R::SplineR2R(const SplineR2R& in) = default; - -template -inline void SplineR2R::set_spline(SingleSplineType* spline_r, - SingleSplineType* spline_i, - int twist, - int ispline, - int level) +template +SplineR2RT::SplineR2RT(const SplineR2RT& in) = default; + +template +inline void SplineR2RT::set_spline(SingleSplineType* spline_r, + SingleSplineType* spline_i, + int twist, + int ispline, + int level) { SplineInst->copy_spline(spline_r, ispline); } -template -bool SplineR2R::read_splines(hdf_archive& h5f) +template +bool SplineR2RT::read_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -bool SplineR2R::write_splines(hdf_archive& h5f) +template +bool SplineR2RT::write_splines(hdf_archive& h5f) { std::ostringstream o; - o << "spline_" << MyIndex; + o << "spline_" << this->MyIndex; einspline_engine bigtable(SplineInst->getSplinePtr()); return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0"); } -template -void SplineR2R::storeParamsBeforeRotation() +template +void SplineR2RT::storeParamsBeforeRotation() { const auto spline_ptr = SplineInst->getSplinePtr(); const auto coefs_tot_size = spline_ptr->coefs_size; @@ -79,7 +79,7 @@ void SplineR2R::storeParamsBeforeRotation() actual number of splined orbitals. This means that in practice rot_mat may be smaller than the number of 'columns' in the coefs array! - SplineR2R spl_coef layout: + SplineR2RT spl_coef layout: ^ | sp1 | ... | spN | pad | | |=====|=====|=====|=====| | | c11 | ... | c1N | 0 | @@ -102,8 +102,8 @@ void SplineR2R::storeParamsBeforeRotation() NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs "matrix" is very tall and skinny. */ -template -void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +template +void SplineR2RT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) { // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp const auto spline_ptr = SplineInst->getSplinePtr(); @@ -113,8 +113,8 @@ void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co const auto coefs_tot_size = spline_ptr->coefs_size; const auto BasisSetSize = coefs_tot_size / Nsplines; const auto TrueNOrbs = rot_mat.size1(); // == Nsplines - padding - assert(OrbitalSetSize == rot_mat.rows()); - assert(OrbitalSetSize == rot_mat.cols()); + assert(this->OrbitalSetSize == rot_mat.rows()); + assert(this->OrbitalSetSize == rot_mat.cols()); if (!use_stored_copy) { @@ -122,22 +122,24 @@ void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin()); } - if constexpr (std::is_same_v) { - //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster - BLAS::gemm('N', 'N', OrbitalSetSize, BasisSetSize, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize, - coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines); + // Here, ST should be equal to ValueType, which will be double for R2R. + // Using BLAS to make things faster + BLAS::gemm('N', 'N', this->OrbitalSetSize, BasisSetSize, this->OrbitalSetSize, ST(1.0), rot_mat.data(), + this->OrbitalSetSize, coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines); } else { - //Here, ST is float but ValueType is double for R2R. Due to issues with type conversions, just doing naive matrix multiplication in this case to not lose precision on rot_mat + // Here, ST is float but ValueType is double for R2R. Due to issues with + // type conversions, just doing naive matrix multiplication in this case + // to not lose precision on rot_mat for (IndexType i = 0; i < BasisSetSize; i++) - for (IndexType j = 0; j < OrbitalSetSize; j++) + for (IndexType j = 0; j < this->OrbitalSetSize; j++) { const auto cur_elem = Nsplines * i + j; FullPrecValueType newval{0.}; - for (IndexType k = 0; k < OrbitalSetSize; k++) + for (IndexType k = 0; k < this->OrbitalSetSize; k++) { const auto index = i * Nsplines + k; newval += (*coef_copy_)[index] * rot_mat[k][j]; @@ -147,22 +149,21 @@ void SplineR2R::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co } } - -template -inline void SplineR2R::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) +template +inline void SplineR2RT::assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST signed_one = (bc_sign & 1) ? -1 : 1; #pragma omp simd for (size_t j = first; j < last; ++j) - psi[first_spo + j] = signed_one * myV[j]; + psi[this->first_spo + j] = signed_one * myV[j]; } -template -void SplineR2R::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) +template +void SplineR2RT::evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) { const PointType& r = P.activeR(iat); PointType ru; @@ -178,11 +179,11 @@ void SplineR2R::evaluateValue(const ParticleSet& P, const int iat, ValueVect } } -template -void SplineR2R::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) +template +void SplineR2RT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) { const bool need_resize = ratios_private.rows() < VP.getTotalNum(); @@ -192,13 +193,14 @@ void SplineR2R::evaluateDetRatios(const VirtualParticleSet& VP, // initialize thread private ratios if (need_resize) { - if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime + if (tid == 0) // just like #pragma omp master, but one fewer call to + // the runtime ratios_private.resize(VP.getTotalNum(), omp_get_num_threads()); #pragma omp barrier } int first, last; FairDivideAligned(psi.size(), getAlignment(), omp_get_num_threads(), tid, first, last); - const int last_real = kPoints.size() < last ? kPoints.size() : last; + const int last_real = this->kPoints.size() < last ? this->kPoints.size() : last; for (int iat = 0; iat < VP.getTotalNum(); ++iat) { @@ -221,16 +223,16 @@ void SplineR2R::evaluateDetRatios(const VirtualParticleSet& VP, } } -template -inline void SplineR2R::assign_vgl(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - int first, - int last) const +template +inline void SplineR2RT::assign_vgl(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST signed_one = (bc_sign & 1) ? -1 : 1; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), @@ -251,7 +253,7 @@ inline void SplineR2R::assign_vgl(int bc_sign, #pragma omp simd for (size_t j = first; j < last; ++j) { - const size_t psiIndex = first_spo + j; + const size_t psiIndex = this->first_spo + j; psi[psiIndex] = signed_one * myV[j]; dpsi[psiIndex][0] = signed_one * (g00 * g0[j] + g01 * g1[j] + g02 * g2[j]); dpsi[psiIndex][1] = signed_one * (g10 * g0[j] + g11 * g1[j] + g12 * g2[j]); @@ -260,10 +262,11 @@ inline void SplineR2R::assign_vgl(int bc_sign, } } -/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian - */ -template -inline void SplineR2R::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +/** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ +template +inline void SplineR2RT::assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { const ST signed_one = (bc_sign & 1) ? -1 : 1; const ST* restrict g0 = myG.data(0); @@ -271,9 +274,9 @@ inline void SplineR2R::assign_vgl_from_l(int bc_sign, ValueVector& psi, Grad const ST* restrict g2 = myG.data(2); #pragma omp simd - for (int psiIndex = first_spo; psiIndex < last_spo; ++psiIndex) + for (int psiIndex = this->first_spo; psiIndex < this->last_spo; ++psiIndex) { - const size_t j = psiIndex - first_spo; + const size_t j = psiIndex - this->first_spo; psi[psiIndex] = signed_one * myV[j]; dpsi[psiIndex][0] = signed_one * g0[j]; dpsi[psiIndex][1] = signed_one * g1[j]; @@ -282,12 +285,12 @@ inline void SplineR2R::assign_vgl_from_l(int bc_sign, ValueVector& psi, Grad } } -template -void SplineR2R::evaluateVGL(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SplineR2RT::evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { const PointType& r = P.activeR(iat); PointType ru; @@ -303,16 +306,16 @@ void SplineR2R::evaluateVGL(const ParticleSet& P, } } -template -void SplineR2R::assign_vgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - int first, - int last) const +template +void SplineR2RT::assign_vgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + int first, + int last) const { // protect last - last = last > kPoints.size() ? kPoints.size() : last; + last = last > this->kPoints.size() ? this->kPoints.size() : last; const ST signed_one = (bc_sign & 1) ? -1 : 1; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), @@ -332,12 +335,12 @@ void SplineR2R::assign_vgh(int bc_sign, #pragma omp simd for (size_t j = first; j < last; ++j) { - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = signed_one * myV[j]; dpsi[psiIndex][0] = signed_one * dX_r; dpsi[psiIndex][1] = signed_one * dY_r; @@ -365,12 +368,12 @@ void SplineR2R::assign_vgh(int bc_sign, } } -template -void SplineR2R::evaluateVGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) +template +void SplineR2RT::evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru; @@ -386,17 +389,17 @@ void SplineR2R::evaluateVGH(const ParticleSet& P, } } -template -void SplineR2R::assign_vghgh(int bc_sign, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi, - int first, - int last) const +template +void SplineR2RT::assign_vghgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first, + int last) const { // protect last - last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last); + last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last); const ST signed_one = (bc_sign & 1) ? -1 : 1; const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3), @@ -424,25 +427,25 @@ void SplineR2R::assign_vghgh(int bc_sign, const ST* restrict gh122 = mygH.data(8); const ST* restrict gh222 = mygH.data(9); - //SIMD doesn't work quite right yet. Comment out until further debugging. - //#pragma omp simd + // SIMD doesn't work quite right yet. Comment out until further debugging. + // #pragma omp simd for (size_t j = first; j < last; ++j) { const ST val_r = myV[j]; - - //dot(PrimLattice.G,myG[j]) + // dot(PrimLattice.G,myG[j]) const ST dX_r = g00 * g0[j] + g01 * g1[j] + g02 * g2[j]; const ST dY_r = g10 * g0[j] + g11 * g1[j] + g12 * g2[j]; const ST dZ_r = g20 * g0[j] + g21 * g1[j] + g22 * g2[j]; - const size_t psiIndex = j + first_spo; + const size_t psiIndex = j + this->first_spo; psi[psiIndex] = signed_one * val_r; dpsi[psiIndex][0] = signed_one * dX_r; dpsi[psiIndex][1] = signed_one * dY_r; dpsi[psiIndex][2] = signed_one * dZ_r; - //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates. + // intermediates for computation of hessian. \partial_i \partial_j phi + // in cartesian coordinates. const ST f_xx_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g00, g01, g02); const ST f_xy_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g10, g11, g12); const ST f_xz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g00, g01, g02, g20, g21, g22); @@ -451,11 +454,11 @@ void SplineR2R::assign_vghgh(int bc_sign, const ST f_zz_r = v_m_v(h00[j], h01[j], h02[j], h11[j], h12[j], h22[j], g20, g21, g22, g20, g21, g22); /* const ST h_xx_r=f_xx_r; - const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; - const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; - const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; - const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; - const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ + const ST h_xy_r=f_xy_r+(kX*dY_i+kY*dX_i)-kX*kY*val_r; + const ST h_xz_r=f_xz_r+(kX*dZ_i+kZ*dX_i)-kX*kZ*val_r; + const ST h_yy_r=f_yy_r+2*kY*dY_i-kY*kY*val_r; + const ST h_yz_r=f_yz_r+(kY*dZ_i+kZ*dY_i)-kY*kZ*val_r; + const ST h_zz_r=f_zz_r+2*kZ*dZ_i-kZ*kZ*val_r; */ grad_grad_psi[psiIndex][0] = f_xx_r * signed_one; grad_grad_psi[psiIndex][1] = f_xy_r * signed_one; @@ -464,12 +467,14 @@ void SplineR2R::assign_vghgh(int bc_sign, grad_grad_psi[psiIndex][5] = f_yz_r * signed_one; grad_grad_psi[psiIndex][8] = f_zz_r * signed_one; - //symmetry: + // symmetry: grad_grad_psi[psiIndex][3] = grad_grad_psi[psiIndex][1]; grad_grad_psi[psiIndex][6] = grad_grad_psi[psiIndex][2]; grad_grad_psi[psiIndex][7] = grad_grad_psi[psiIndex][5]; - //These are the real and imaginary components of the third SPO derivative. _xxx denotes - // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on. + // These are the real and imaginary components of the third SPO + // derivative. _xxx denotes + // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, + // and z, and so on. const ST f3_xxx_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g00, g01, g02, g00, g01, g02, g00, g01, g02); @@ -492,17 +497,23 @@ void SplineR2R::assign_vghgh(int bc_sign, const ST f3_zzz_r = t3_contract(gh000[j], gh001[j], gh002[j], gh011[j], gh012[j], gh022[j], gh111[j], gh112[j], gh122[j], gh222[j], g20, g21, g22, g20, g21, g22, g20, g21, g22); - //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) - /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - kX*kX*kX*val_i; - const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; - const ST gh_xxz_r= f3_xxz_r +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; - const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; - const ST gh_xyz_r= f3_xyz_r +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - kX*kY*kZ*val_i; - const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; - const ST gh_yyy_r= f3_yyy_r + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; - const ST gh_yyz_r= f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; - const ST gh_yzz_r= f3_yzz_r +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; - const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - kZ*kZ*kZ*val_i;*/ + // Here is where we build up the components of the physical hessian + // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r) + /* const ST gh_xxx_r= f3_xxx_r + 3*kX*f_xx_i - 3*kX*kX*dX_r - + kX*kX*kX*val_i; const ST gh_xxy_r= f3_xxy_r +(kY*f_xx_i+2*kX*f_xy_i) - + (kX*kX*dY_r+2*kX*kY*dX_r)-kX*kX*kY*val_i; const ST gh_xxz_r= f3_xxz_r + +(kZ*f_xx_i+2*kX*f_xz_i) - (kX*kX*dZ_r+2*kX*kZ*dX_r)-kX*kX*kZ*val_i; + const ST gh_xyy_r= f3_xyy_r +(2*kY*f_xy_i+kX*f_yy_i) - + (2*kX*kY*dY_r+kY*kY*dX_r)-kX*kY*kY*val_i; const ST gh_xyz_r= f3_xyz_r + +(kX*f_yz_i+kY*f_xz_i+kZ*f_xy_i)-(kX*kY*dZ_r+kY*kZ*dX_r+kZ*kX*dY_r) - + kX*kY*kZ*val_i; const ST gh_xzz_r= f3_xzz_r +(2*kZ*f_xz_i+kX*f_zz_i) - + (2*kX*kZ*dZ_r+kZ*kZ*dX_r)-kX*kZ*kZ*val_i; const ST gh_yyy_r= f3_yyy_r + + 3*kY*f_yy_i - 3*kY*kY*dY_r - kY*kY*kY*val_i; const ST gh_yyz_r= + f3_yyz_r +(kZ*f_yy_i+2*kY*f_yz_i) - + (kY*kY*dZ_r+2*kY*kZ*dY_r)-kY*kY*kZ*val_i; const ST gh_yzz_r= f3_yzz_r + +(2*kZ*f_yz_i+kY*f_zz_i) - (2*kY*kZ*dZ_r+kZ*kZ*dY_r)-kY*kZ*kZ*val_i; + const ST gh_zzz_r= f3_zzz_r + 3*kZ*f_zz_i - 3*kZ*kZ*dZ_r - + kZ*kZ*kZ*val_i;*/ //[x][xx] //These are the unique entries grad_grad_grad_psi[psiIndex][0][0] = signed_one * f3_xxx_r; grad_grad_grad_psi[psiIndex][0][1] = signed_one * f3_xxy_r; @@ -511,12 +522,12 @@ void SplineR2R::assign_vghgh(int bc_sign, grad_grad_grad_psi[psiIndex][0][5] = signed_one * f3_xyz_r; grad_grad_grad_psi[psiIndex][0][8] = signed_one * f3_xzz_r; - //filling in the symmetric terms. Filling out the xij terms + // filling in the symmetric terms. Filling out the xij terms grad_grad_grad_psi[psiIndex][0][3] = grad_grad_grad_psi[psiIndex][0][1]; grad_grad_grad_psi[psiIndex][0][6] = grad_grad_grad_psi[psiIndex][0][2]; grad_grad_grad_psi[psiIndex][0][7] = grad_grad_grad_psi[psiIndex][0][5]; - //Now for everything that's a permutation of the above: + // Now for everything that's a permutation of the above: grad_grad_grad_psi[psiIndex][1][0] = grad_grad_grad_psi[psiIndex][0][1]; grad_grad_grad_psi[psiIndex][1][1] = grad_grad_grad_psi[psiIndex][0][4]; grad_grad_grad_psi[psiIndex][1][2] = grad_grad_grad_psi[psiIndex][0][5]; @@ -542,13 +553,13 @@ void SplineR2R::assign_vghgh(int bc_sign, } } -template -void SplineR2R::evaluateVGHGH(const ParticleSet& P, - const int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) +template +void SplineR2RT::evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) { const PointType& r = P.activeR(iat); PointType ru; @@ -564,7 +575,9 @@ void SplineR2R::evaluateVGHGH(const ParticleSet& P, } } -template class SplineR2R; -template class SplineR2R; +template class SplineR2RT; +template class SplineR2RT; // do we need this one? +template class SplineR2RT; +template class SplineR2RT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h new file mode 100644 index 0000000000..38043227ee --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h @@ -0,0 +1,228 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPLINE_R2RT_H +#define QMCPLUSPLUS_SPLINE_R2RT_H + +#include "OhmmsSoA/VectorSoaContainer.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "Utilities/FairDivide.h" +#include "spline2/MultiBspline.hpp" + +#include + +namespace qmcplusplus +{ +/** class to match ST real spline with BsplineSet::ValueType (real) SPOs + * @tparam ST precision of spline + * + * Requires temporage storage and multiplication of the sign of the real part of + * the phase Internal storage ST type arrays are aligned and padded. + */ +template +class SplineR2RT : public BsplineSetT +{ +public: + using SplineType = typename bspline_traits::SplineType; + using BCType = typename bspline_traits::BCType; + using DataType = ST; + using RealType = typename SPOSetT::RealType; + using IndexType = typename SPOSetT::IndexType; + using FullPrecValueType = double; + using PointType = TinyVector; + using SingleSplineType = UBspline_3d_d; + + // types for evaluation results + using TT = typename BsplineSetT::ValueType; + using GGGVector = typename BsplineSetT::GGGVector; + using ValueMatrix = typename BsplineSetT::ValueMatrix; + using GradVector = typename BsplineSetT::GradVector; + using HessVector = typename BsplineSetT::HessVector; + using ValueVector = typename BsplineSetT::ValueVector; + + using vContainer_type = Vector>; + using gContainer_type = VectorSoaContainer; + using hContainer_type = VectorSoaContainer; + using ghContainer_type = VectorSoaContainer; + +private: + bool IsGamma; + ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to + /// CartesianUnit, e.g. Hessian + Tensor GGt; + /// multi bspline set + std::shared_ptr> SplineInst; + + /// Copy of original splines for orbital rotation + std::shared_ptr> coef_copy_; + + /// thread private ratios for reduction when using nested threading, numVP x + /// numThread + Matrix ratios_private; + +protected: + /// primitive cell + CrystalLattice PrimLattice; + /// intermediate result vectors + vContainer_type myV; + vContainer_type myL; + gContainer_type myG; + hContainer_type myH; + ghContainer_type mygH; + +public: + SplineR2RT(const std::string& my_name) : BsplineSetT(my_name) {} + + SplineR2RT(const SplineR2RT& in); + virtual std::string getClassName() const override { return "SplineR2RT"; } + virtual std::string getKeyword() const override { return "SplineR2RT"; } + bool isComplex() const override { return false; }; + bool isRotationSupported() const override { return true; } + + std::unique_ptr> makeClone() const override { return std::make_unique>(*this); } + + /// Store an original copy of the spline coefficients for orbital rotation + void storeParamsBeforeRotation() override; + + /* + Implements orbital rotations via [1,2]. + Should be called by RotatedSPOs::apply_rotation() + + This implementation requires that NSPOs > Nelec. In other words, + if you want to run a orbopt wfn, you must include some virtual orbitals! + + Some results (using older Berkeley branch) were published in [3]. + + [1] Filippi & Fahy, JCP 112, (2000) + [2] Toulouse & Umrigar, JCP 126, (2007) + [3] Townsend et al., PRB 102, (2020) + */ + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override; + + inline void resizeStorage(size_t n, size_t nvals) + { + this->init_base(n); + const size_t npad = getAlignedSize(n); + this->myV.resize(npad); + this->myG.resize(npad); + this->myL.resize(npad); + this->myH.resize(npad); + this->mygH.resize(npad); + + IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0)); + } + + void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } + + void gather_tables(Communicate* comm) + { + if (comm->size() == 1) + return; + const int Nbands = this->kPoints.size(); + const int Nbandgroups = comm->size(); + this->offset.resize(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, this->offset); + gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset); + } + + template + void create_spline(GT& xyz_g, BCT& xyz_bc) + { + GGt = dot(transpose(PrimLattice.G), PrimLattice.G); + SplineInst = std::make_shared>(); + SplineInst->create(xyz_g, xyz_bc, myV.size()); + + app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " + << "for the coefficients in 3D spline orbital representation" << std::endl; + } + + inline void flush_zero() { SplineInst->flush_zero(); } + + void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); + + bool read_splines(hdf_archive& h5f); + + bool write_splines(hdf_archive& h5f); + + /** convert position in PrimLattice unit and return sign */ + inline int convertPos(const PointType& r, PointType& ru) + { + ru = PrimLattice.toUnit(r); + int bc_sign = 0; + for (int i = 0; i < this->D; i++) + if (-std::numeric_limits::epsilon() < ru[i] && ru[i] < 0) + ru[i] = ST(0.0); + else + { + ST img = std::floor(ru[i]); + ru[i] -= img; + bc_sign += this->HalfG[i] * (int)img; + } + return bc_sign; + } + + void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const; + + void evaluateValue(const ParticleSetT& P, const int iat, ValueVector& psi) override; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override; + + void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const; + + /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in + * cartesian + */ + void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluateVGL(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) override; + + void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) + const; + + void evaluateVGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override; + + void assign_vghgh(int bc_sign, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi, + int first = 0, + int last = -1) const; + + void evaluateVGHGH(const ParticleSetT& P, + const int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override; + + template + friend class SplineSetReaderT; + template + friend class BsplineReaderBaseT; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h index dbeb68ff3c..4758342104 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h @@ -25,270 +25,14 @@ */ #ifndef QMCPLUSPLUS_SPLINESET_READER_H #define QMCPLUSPLUS_SPLINESET_READER_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" -#include "Utilities/FairDivide.h" + +#include "Configuration.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" namespace qmcplusplus { -/** General SplineSetReader to handle any unitcell - */ -template -struct SplineSetReader : public BsplineReaderBase -{ - using splineset_t = SA; - using DataType = typename splineset_t::DataType; - using SplineType = typename splineset_t::SplineType; - - Array, 3> FFTbox; - Array splineData_r, splineData_i; - double rotate_phase_r, rotate_phase_i; - UBspline_3d_d* spline_r; - UBspline_3d_d* spline_i; - splineset_t* bspline; - fftw_plan FFTplan; - - SplineSetReader(EinsplineSetBuilder* e) - : BsplineReaderBase(e), spline_r(nullptr), spline_i(nullptr), bspline(nullptr), FFTplan(nullptr) - {} - - ~SplineSetReader() override { clear(); } - - void clear() - { - einspline::destroy(spline_r); - einspline::destroy(spline_i); - if (FFTplan != nullptr) - fftw_destroy_plan(FFTplan); - FFTplan = nullptr; - } - - // set info for Hybrid - virtual void initialize_hybridrep_atomic_centers() {} - // transform cG to radial functions - virtual void create_atomic_centers_Gspace(Vector>& cG, Communicate& band_group_comm, int iorb) {} - - std::unique_ptr create_spline_set(const std::string& my_name, - int spin, - const BandInfoGroup& bandgroup) override - { - ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); - //Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, c_init; - //double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; - bspline = new splineset_t(my_name); - app_log() << " ClassName = " << bspline->getClassName() << std::endl; - if (bspline->isComplex()) - app_log() << " Using complex einspline table" << std::endl; - else - app_log() << " Using real einspline table" << std::endl; - - // set info for Hybrid - this->initialize_hybridrep_atomic_centers(); - - //baseclass handles twists - check_twists(bspline, bandgroup); - - Ugrid xyz_grid[3]; - - typename splineset_t::BCType xyz_bc[3]; - bool havePsig = set_grid(bspline->HalfG, xyz_grid, xyz_bc); - if (!havePsig) - myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); - bspline->create_spline(xyz_grid, xyz_bc); - - std::ostringstream oo; - oo << bandgroup.myName << ".g" << MeshSize[0] << "x" << MeshSize[1] << "x" << MeshSize[2] << ".h5"; - - const std::string splinefile(oo.str()); - bool root = (myComm->rank() == 0); - int foundspline = 0; - Timer now; - if (root) - { - now.restart(); - hdf_archive h5f(myComm); - foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); - if (foundspline) - { - std::string aname("none"); - foundspline = h5f.readEntry(aname, "class_name"); - foundspline = (aname.find(bspline->getKeyword()) != std::string::npos); - } - if (foundspline) - { - int sizeD = 0; - foundspline = h5f.readEntry(sizeD, "sizeof"); - foundspline = (sizeD == sizeof(typename splineset_t::DataType)); - } - if (foundspline) - { - foundspline = bspline->read_splines(h5f); - if (foundspline) - app_log() << " Successfully restored coefficients from " << splinefile << ". The reading time is " - << now.elapsed() << " sec." << std::endl; - } - h5f.close(); - } - myComm->bcast(foundspline); - if (foundspline) - { - now.restart(); - bspline->bcast_tables(myComm); - app_log() << " SplineSetReader bcast the full table " << now.elapsed() << " sec." << std::endl; - app_log().flush(); - } - else - { - bspline->flush_zero(); - - int nx = MeshSize[0]; - int ny = MeshSize[1]; - int nz = MeshSize[2]; - if (havePsig) //perform FFT using FFTW - { - FFTbox.resize(nx, ny, nz); - FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), - reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); - splineData_r.resize(nx, ny, nz); - if (bspline->isComplex()) - splineData_i.resize(nx, ny, nz); - - TinyVector start(0.0); - TinyVector end(1.0); - spline_r = einspline::create(spline_r, start, end, MeshSize, bspline->HalfG); - if (bspline->isComplex()) - spline_i = einspline::create(spline_i, start, end, MeshSize, bspline->HalfG); - - now.restart(); - initialize_spline_pio_gather(spin, bandgroup); - app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; - - fftw_destroy_plan(FFTplan); - FFTplan = NULL; - } - else //why, don't know - initialize_spline_psi_r(spin, bandgroup); - if (saveSplineCoefs && root) - { - now.restart(); - hdf_archive h5f; - h5f.create(splinefile); - std::string classname = bspline->getClassName(); - h5f.write(classname, "class_name"); - int sizeD = sizeof(typename splineset_t::DataType); - h5f.write(sizeD, "sizeof"); - bspline->write_splines(h5f); - h5f.close(); - app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " - << now.elapsed() << " sec." << std::endl; - } - } - - clear(); - return std::unique_ptr{bspline}; - } - - /** fft and spline cG - * @param cG psi_g to be processed - * @param ti twist index - * @param iorb orbital index - * - * Perform FFT and spline to spline_r and spline_i - */ - inline void fft_spline(Vector>& cG, int ti) - { - unpack4fftw(cG, mybuilder->Gvecs[0], MeshSize, FFTbox); - fftw_execute(FFTplan); - if (bspline->isComplex()) - { - if (rotate) - fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, mybuilder->primcell_kpoints[ti], rotate_phase_r, - rotate_phase_i); - else - { - split_real_components_c2c(FFTbox, splineData_r, splineData_i); - rotate_phase_r = 1.0; - rotate_phase_i = 0.0; - } - einspline::set(spline_r, splineData_r.data()); - einspline::set(spline_i, splineData_i.data()); - } - else - { - fix_phase_rotate_c2r(FFTbox, splineData_r, mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i); - einspline::set(spline_r, splineData_r.data()); - } - } - - - /** initialize the splines - */ - void initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) - { - //distribute bands over processor groups - int Nbands = bandgroup.getNumDistinctOrbitals(); - const int Nprocs = myComm->size(); - const int Nbandgroups = std::min(Nbands, Nprocs); - Communicate band_group_comm(*myComm, Nbandgroups); - std::vector band_groups(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, band_groups); - int iorb_first = band_groups[band_group_comm.getGroupID()]; - int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; - - app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; - hdf_archive h5f(&band_group_comm, false); - Vector> cG(mybuilder->Gvecs[0].size()); - const std::vector& cur_bands = bandgroup.myBands; - if (band_group_comm.isGroupLeader()) - h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); - for (int iorb = iorb_first; iorb < iorb_last; iorb++) - { - if (band_group_comm.isGroupLeader()) - { - int iorb_h5 = bspline->BandIndexMap[iorb]; - int ti = cur_bands[iorb_h5].TwistIndex; - std::string s = psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); - if (!h5f.readEntry(cG, s)) - { - std::ostringstream msg; - msg << "SplineSetReader Failed to read band(s) from h5 file. " - << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; - throw std::runtime_error(msg.str()); - } - double total_norm = compute_norm(cG); - if ((checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) - { - std::ostringstream msg; - msg << "SplineSetReader The orbital " << iorb_h5 << " has a wrong norm " << total_norm - << ", computed from plane wave coefficients!" << std::endl - << "This may indicate a problem with the HDF5 library versions used " - << "during wavefunction conversion or read." << std::endl; - throw std::runtime_error(msg.str()); - } - fft_spline(cG, ti); - bspline->set_spline(spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); - } - this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); - } - - myComm->barrier(); - Timer now; - if (band_group_comm.isGroupLeader()) - { - now.restart(); - bspline->gather_tables(band_group_comm.getGroupLeaderComm()); - app_log() << " Time to gather the table = " << now.elapsed() << std::endl; - } - now.restart(); - bspline->bcast_tables(myComm); - app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; - } +template +using SplineSetReader = SplineSetReaderT; - void initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) - { - // old implementation buried in the history - myComm->barrier_and_abort("SplineSetReaderP initialize_spline_psi_r implementation not finished."); - } -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h new file mode 100644 index 0000000000..54ce28f96c --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h @@ -0,0 +1,295 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@inte.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPLINESET_READERT_H +#define QMCPLUSPLUS_SPLINESET_READERT_H +#include "BsplineFactory/BsplineReaderBaseT.h" +#include "Utilities/FairDivide.h" +#include "mpi/collectives.h" +#include "mpi/point2point.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "fftw3.h" + +namespace qmcplusplus +{ +/** General SplineSetReader to handle any unitcell + */ +template +class SplineSetReaderT : public BsplineReaderBaseT +{ +public: + using splineset_t = SA; + using DataType = typename splineset_t::DataType; + using SplineType = typename splineset_t::SplineType; + using ValueType = typename splineset_t::ValueType; + + Array, 3> FFTbox; + Array splineData_r, splineData_i; + double rotate_phase_r, rotate_phase_i; + UBspline_3d_d* spline_r; + UBspline_3d_d* spline_i; + splineset_t* bspline; + fftw_plan FFTplan; + + SplineSetReaderT(EinsplineSetBuilderT* e) + : BsplineReaderBaseT(e), spline_r(nullptr), spline_i(nullptr), bspline(nullptr), FFTplan(nullptr) + {} + + ~SplineSetReaderT() override { clear(); } + + void clear() + { + einspline::destroy(spline_r); + einspline::destroy(spline_i); + if (FFTplan != nullptr) + fftw_destroy_plan(FFTplan); + FFTplan = nullptr; + } + + // set info for Hybrid + virtual void initialize_hybridrep_atomic_centers() {} + // transform cG to radial functions + virtual void create_atomic_centers_Gspace(Vector>& cG, Communicate& band_group_comm, int iorb) {} + + std::unique_ptr> create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) override + { + ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)"); + // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5, + // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0, + // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0; + bspline = new splineset_t(my_name); + app_log() << " ClassName = " << bspline->getClassName() << std::endl; + if (bspline->isComplex()) + app_log() << " Using complex einspline table" << std::endl; + else + app_log() << " Using real einspline table" << std::endl; + + // set info for Hybrid + this->initialize_hybridrep_atomic_centers(); + + // baseclass handles twists + this->check_twists(bspline, bandgroup); + + Ugrid xyz_grid[3]; + + typename splineset_t::BCType xyz_bc[3]; + bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc); + if (!havePsig) + this->myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); + bspline->create_spline(xyz_grid, xyz_bc); + + std::ostringstream oo; + oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x" << this->MeshSize[1] << "x" << this->MeshSize[2] + << ".h5"; + + const std::string splinefile(oo.str()); + bool root = (this->myComm->rank() == 0); + int foundspline = 0; + Timer now; + if (root) + { + now.restart(); + hdf_archive h5f(this->myComm); + foundspline = h5f.open(splinefile, H5F_ACC_RDONLY); + if (foundspline) + { + std::string aname("none"); + foundspline = h5f.readEntry(aname, "class_name"); + foundspline = (aname.find(bspline->getKeyword()) != std::string::npos); + } + if (foundspline) + { + int sizeD = 0; + foundspline = h5f.readEntry(sizeD, "sizeof"); + foundspline = (sizeD == sizeof(DataType)); + } + if (foundspline) + { + foundspline = bspline->read_splines(h5f); + if (foundspline) + app_log() << " Successfully restored coefficients from " << splinefile << ". The reading time is " + << now.elapsed() << " sec." << std::endl; + } + h5f.close(); + } + this->myComm->bcast(foundspline); + if (foundspline) + { + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " SplineSetReader bcast the full table " << now.elapsed() << " sec." << std::endl; + app_log().flush(); + } + else + { + bspline->flush_zero(); + + int nx = this->MeshSize[0]; + int ny = this->MeshSize[1]; + int nz = this->MeshSize[2]; + if (havePsig) // perform FFT using FFTW + { + FFTbox.resize(nx, ny, nz); + FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), + reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); + splineData_r.resize(nx, ny, nz); + if (bspline->isComplex()) + splineData_i.resize(nx, ny, nz); + + TinyVector start(0.0); + TinyVector end(1.0); + spline_r = einspline::create(spline_r, start, end, this->MeshSize, bspline->HalfG); + if (bspline->isComplex()) + spline_i = einspline::create(spline_i, start, end, this->MeshSize, bspline->HalfG); + + now.restart(); + initialize_spline_pio_gather(spin, bandgroup); + app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; + + fftw_destroy_plan(FFTplan); + FFTplan = NULL; + } + else // why, don't know + initialize_spline_psi_r(spin, bandgroup); + if (this->saveSplineCoefs && root) + { + now.restart(); + hdf_archive h5f; + h5f.create(splinefile); + std::string classname = bspline->getClassName(); + h5f.write(classname, "class_name"); + int sizeD = sizeof(DataType); + h5f.write(sizeD, "sizeof"); + bspline->write_splines(h5f); + h5f.close(); + app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " + << now.elapsed() << " sec." << std::endl; + } + } + + clear(); + return std::unique_ptr>{bspline}; + } + + /** fft and spline cG + * @param cG psi_g to be processed + * @param ti twist index + * @param iorb orbital index + * + * Perform FFT and spline to spline_r and spline_i + */ + inline void fft_spline(Vector>& cG, int ti) + { + unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox); + fftw_execute(FFTplan); + if (bspline->isComplex()) + { + if (this->rotate) + fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, this->mybuilder->primcell_kpoints[ti], rotate_phase_r, + rotate_phase_i); + else + { + split_real_components_c2c(FFTbox, splineData_r, splineData_i); + rotate_phase_r = 1.0; + rotate_phase_i = 0.0; + } + einspline::set(spline_r, splineData_r.data()); + einspline::set(spline_i, splineData_i.data()); + } + else + { + fix_phase_rotate_c2r(FFTbox, splineData_r, this->mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i); + einspline::set(spline_r, splineData_r.data()); + } + } + + /** initialize the splines + */ + void initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup) + { + // distribute bands over processor groups + int Nbands = bandgroup.getNumDistinctOrbitals(); + const int Nprocs = this->myComm->size(); + const int Nbandgroups = std::min(Nbands, Nprocs); + Communicate band_group_comm(*this->myComm, Nbandgroups); + std::vector band_groups(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, band_groups); + int iorb_first = band_groups[band_group_comm.getGroupID()]; + int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + + app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; + hdf_archive h5f(&band_group_comm, false); + Vector> cG(this->mybuilder->Gvecs[0].size()); + const std::vector& cur_bands = bandgroup.myBands; + if (band_group_comm.isGroupLeader()) + h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY); + for (int iorb = iorb_first; iorb < iorb_last; iorb++) + { + if (band_group_comm.isGroupLeader()) + { + int iorb_h5 = bspline->BandIndexMap[iorb]; + int ti = cur_bands[iorb_h5].TwistIndex; + std::string s = this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex); + if (!h5f.readEntry(cG, s)) + { + std::ostringstream msg; + msg << "SplineSetReader Failed to read band(s) from h5 " + "file. " + << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; + throw std::runtime_error(msg.str()); + } + double total_norm = compute_norm(cG); + if ((this->checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) + { + std::ostringstream msg; + msg << "SplineSetReader The orbital " << iorb_h5 << " has a wrong norm " << total_norm + << ", computed from plane wave coefficients!" << std::endl + << "This may indicate a problem with the HDF5 library " + "versions used " + << "during wavefunction conversion or read." << std::endl; + throw std::runtime_error(msg.str()); + } + fft_spline(cG, ti); + bspline->set_spline(spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0); + } + this->create_atomic_centers_Gspace(cG, band_group_comm, iorb); + } + + this->myComm->barrier(); + Timer now; + if (band_group_comm.isGroupLeader()) + { + now.restart(); + bspline->gather_tables(band_group_comm.getGroupLeaderComm()); + app_log() << " Time to gather the table = " << now.elapsed() << std::endl; + } + now.restart(); + bspline->bcast_tables(this->myComm); + app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; + } + + void initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup) + { + // old implementation buried in the history + this->myComm->barrier_and_abort("SplineSetReaderP initialize_spline_psi_r " + "implementation not finished."); + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h index e8a9a4972c..cce9148653 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h @@ -13,42 +13,6 @@ #ifndef QMCPLUSPLUS_CREATE_BSPLINE_READER_H #define QMCPLUSPLUS_CREATE_BSPLINE_READER_H -#include -#include +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" -namespace qmcplusplus -{ -///forward declaration -struct BsplineReaderBase; -class EinsplineSetBuilder; - -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is double - */ -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is float - */ -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles real splines, R2R case - * spline storage and computation precision is double - */ -std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -/** create a reader which handles real splines, R2R case - * spline storage and computation precision is float - */ -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); - -} // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp new file mode 100644 index 0000000000..a2d8ed415c --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp @@ -0,0 +1,311 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" + +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepRealT.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2CT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineC2RT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineR2RT.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include +#include + +namespace qmcplusplus +{ +template +struct CreateComplexHelper +{ + static inline std::unique_ptr> createDouble(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) + { + using RealType = typename EinsplineSetBuilderT::RealType; + std::unique_ptr> aReader; + + app_summary() << " Using real valued spline SPOs with complex double " + "precision storage (C2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + { + app_summary() << " Running OpenMP offload code path." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + else + { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + + return aReader; + } + + static inline std::unique_ptr> createSingle(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) + { + using RealType = typename EinsplineSetBuilderT::RealType; + std::unique_ptr> aReader; + + app_summary() << " Using real valued spline SPOs with complex single " + "precision storage (C2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + { + app_summary() << " Running OpenMP offload code path." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + else + { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + + return aReader; + } +}; + +template +struct CreateComplexHelper> +{ + using ValueType = std::complex; + using RealType = typename EinsplineSetBuilderT::RealType; + + static inline std::unique_ptr> createDouble(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) + { + std::unique_ptr> aReader; + + app_summary() << " Using complex valued spline SPOs with complex double " + "precision storage (C2C)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + { + app_summary() << " Running OpenMP offload code path." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + else + { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + + return aReader; + } + + static inline std::unique_ptr> createSingle(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) + { + std::unique_ptr> aReader; + + app_summary() << " Using complex valued spline SPOs with complex single " + "precision storage (C2C)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + { + app_summary() << " Running OpenMP offload code path." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + else + { + app_summary() << " Running on CPU." << std::endl; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + } + + return aReader; + } +}; + +template +std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) +{ + return CreateComplexHelper::createDouble(e, hybrid_rep, useGPU); +} +#ifdef QMC_COMPLEX +template std::unique_ptr>> createBsplineComplexDoubleT>( + EinsplineSetBuilderT>* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr>> createBsplineComplexDoubleT>( + EinsplineSetBuilderT>* e, + bool hybrid_rep, + const std::string& useGPU); + +#endif +template std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> createBsplineComplexDoubleT( + EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template +std::unique_ptr> createBsplineComplexSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) +{ + return CreateComplexHelper::createSingle(e, hybrid_rep, useGPU); +} + +#ifdef QMC_COMPLEX +template std::unique_ptr>> createBsplineComplexSingleT>( + EinsplineSetBuilderT>* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr>> createBsplineComplexSingleT>( + EinsplineSetBuilderT>* e, + bool hybrid_rep, + const std::string& useGPU); +#endif + +template std::unique_ptr> createBsplineComplexSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> createBsplineComplexSingleT( + EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template +std::unique_ptr> createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) +{ + app_summary() << " Using real valued spline SPOs with real double " + "precision storage (R2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + app_summary() << "OpenMP offload has not been implemented to support " + "real valued spline SPOs with real storage!" + << std::endl; + app_summary() << " Running on CPU." << std::endl; + + std::unique_ptr> aReader; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + return aReader; +} + +template std::unique_ptr> createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template +std::unique_ptr> createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU) +{ + app_summary() << " Using real valued spline SPOs with real single " + "precision storage (R2R)." + << std::endl; + if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) + app_summary() << "OpenMP offload has not been implemented to support " + "real valued spline SPOs with real storage!" + << std::endl; + app_summary() << " Running on CPU." << std::endl; + + std::unique_ptr> aReader; + if (hybrid_rep) + { + app_summary() << " Using hybrid orbital representation." << std::endl; + aReader = std::make_unique>>>(e); + } + else + aReader = std::make_unique>>(e); + return aReader; +} + +template std::unique_ptr> createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +template std::unique_ptr> createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h new file mode 100644 index 0000000000..fe9096c778 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h @@ -0,0 +1,59 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2019 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_CREATE_BSPLINE_READERT_H +#define QMCPLUSPLUS_CREATE_BSPLINE_READERT_H + +#include +#include + +namespace qmcplusplus +{ +/// forward declaration +template +class BsplineReaderBaseT; +template +class EinsplineSetBuilderT; + +/** create a reader which handles complex (double size real) splines, C2R or C2C + * case spline storage and computation precision is double + */ +template +std::unique_ptr> createBsplineComplexDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +/** create a reader which handles complex (double size real) splines, C2R or C2C + * case spline storage and computation precision is float + */ +template +std::unique_ptr> createBsplineComplexSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +/** create a reader which handles real splines, R2R case + * spline storage and computation precision is double + */ +template +std::unique_ptr> createBsplineRealDoubleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +/** create a reader which handles real splines, R2R case + * spline storage and computation precision is float + */ +template +std::unique_ptr> createBsplineRealSingleT(EinsplineSetBuilderT* e, + bool hybrid_rep, + const std::string& useGPU); + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp index 202c79f469..3f21d17818 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp @@ -23,16 +23,16 @@ #include "SplineC2COMPTarget.h" #include "HybridRepCplx.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineComplexDoubleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { using RealType = OHMMS_PRECISION; std::unique_ptr aReader; @@ -45,7 +45,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -56,7 +56,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -69,7 +69,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -80,7 +80,7 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp index 9711a404fe..021ac94bca 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp @@ -23,16 +23,16 @@ #include "SplineC2COMPTarget.h" #include "HybridRepCplx.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineComplexSingleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { using RealType = OHMMS_PRECISION; std::unique_ptr aReader; @@ -45,7 +45,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -56,7 +56,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -69,7 +69,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); @@ -80,7 +80,7 @@ std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilde if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp index 1dfd43d5f2..08089fbc6c 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp @@ -18,10 +18,10 @@ #include "SplineR2R.h" #include "HybridRepReal.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { @@ -39,7 +39,7 @@ std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp index 5b5a3a2924..9d6299bd65 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp @@ -18,16 +18,16 @@ #include "SplineR2R.h" #include "HybridRepReal.h" #include -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h" +#include "QMCWaveFunctions/BsplineFactory/SplineSetReader.h" +#include "QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h" namespace qmcplusplus { -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +std::unique_ptr createBsplineRealSingleT(EinsplineSetBuilder* e, + bool hybrid_rep, + const std::string& useGPU) { app_summary() << " Using real valued spline SPOs with real single precision storage (R2R)." << std::endl; if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) @@ -39,7 +39,7 @@ std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else aReader = std::make_unique>>(e); diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 99e5675209..96e3c01ed1 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -23,29 +23,24 @@ add_subdirectory(detail) set(WFBASE_SRCS OptimizableFunctorBase.cpp - VariableSet.cpp NaNguard.cpp + VariableSetT.cpp WaveFunctionPool.cpp WaveFunctionComponent.cpp WaveFunctionComponentBuilder.cpp - SPOSetBuilder.cpp + SPOSetBuilderT.cpp SPOInfo.cpp SPOSetInfo.cpp SPOSetInputInfo.cpp - SPOSet.cpp - CompositeSPOSet.cpp - HarmonicOscillator/SHOSet.cpp - HarmonicOscillator/SHOSetBuilder.cpp + SPOSetT.cpp + CompositeSPOSetT.cpp + HarmonicOscillator/SHOSetT.cpp + HarmonicOscillator/SHOSetBuilderT.cpp ExampleHeBuilder.cpp - ExampleHeComponent.cpp) + ExampleHeComponent.cpp + RotatedSPOsT.cpp + SpinorSetT.cpp) -if(NOT QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} RotatedSPOs.cpp) -endif(NOT QMC_COMPLEX) - -if(QMC_COMPLEX) - set(WFBASE_SRCS ${WFBASE_SRCS} SpinorSet.cpp) -endif(QMC_COMPLEX) ######################## # build jastrows ######################## @@ -63,48 +58,73 @@ set(JASTROW_SRCS set(JASTROW_OMPTARGET_SRCS Jastrow/TwoBodyJastrow.cpp Jastrow/BsplineFunctor.cpp) -set(FERMION_SRCS ${FERMION_SRCS} ElectronGas/FreeOrbital.cpp ElectronGas/FreeOrbitalBuilder.cpp) +set(FERMION_SRCS ${FERMION_SRCS} + ElectronGas/FreeOrbitalT.cpp + ElectronGas/FreeOrbitalBuilderT.cpp) # wavefunctions only availbale to 3-dim problems if(OHMMS_DIM MATCHES 3) set(JASTROW_SRCS ${JASTROW_SRCS} Jastrow/eeI_JastrowBuilder.cpp Jastrow/CountingJastrowBuilder.cpp) - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSet.cpp LCAO/LCAOrbitalBuilder.cpp LCAO/MultiQuinticSpline1D.cpp - LCAO/AOBasisBuilder.cpp LCAO/SoaLocalizedBasisSet.cpp) - if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp) - else(QMC_COMPLEX) + set(FERMION_SRCS ${FERMION_SRCS} + LCAO/LCAOrbitalSetT.cpp + LCAO/LCAOrbitalBuilderT.cpp + LCAO/MultiQuinticSpline1D.cpp + LCAO/AOBasisBuilderT.cpp + LCAO/SoaLocalizedBasisSet.cpp + LCAO/LCAOSpinorBuilderT.cpp + LCAO/SoaCuspCorrectionT.cpp) + if(NOT QMC_COMPLEX) #LCAO cusp correction is not ready for complex - set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOrbitalSetWithCorrection.cpp - LCAO/CuspCorrectionConstruction.cpp LCAO/SoaCuspCorrection.cpp) - endif(QMC_COMPLEX) + set(FERMION_SRCS ${FERMION_SRCS} + LCAO/LCAOrbitalSetWithCorrectionT.cpp + LCAO/CuspCorrectionConstructionT.cpp) + endif() if(HAVE_EINSPLINE) set(FERMION_SRCS ${FERMION_SRCS} - BsplineFactory/EinsplineSetBuilderCommon.cpp - BsplineFactory/EinsplineSetBuilderESHDF.fft.cpp - BsplineFactory/EinsplineSetBuilder_createSPOs.cpp + EinsplineSetBuilderT.cpp + BsplineFactory/createBsplineReaderT.cpp BsplineFactory/createComplexDouble.cpp BsplineFactory/createComplexSingle.cpp - BsplineFactory/HybridRepCenterOrbitals.cpp + BsplineFactory/HybridRepCenterOrbitalsT.cpp BandInfo.cpp - BsplineFactory/BsplineReaderBase.cpp) - set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp) + BsplineFactory/SplineC2RT.cpp + BsplineFactory/SplineR2RT.cpp + BsplineFactory/SplineC2CT.cpp + BsplineFactory/BsplineReaderBaseT.cpp) + set(FERMION_OMPTARGET_SRCS + Fermion/DiracDeterminantBatched.cpp + Fermion/MultiDiracDeterminant.2.cpp + BsplineFactory/SplineC2ROMPTargetT.cpp + BsplineFactory/SplineC2COMPTargetT.cpp + ) if(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + EinsplineSpinorSetBuilderT.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} + EinsplineSpinorSetBuilderT.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS}) else(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp - BsplineFactory/SplineC2R.cpp BsplineFactory/SplineR2R.cpp) - set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2ROMPTarget.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + BsplineFactory/createRealSingle.cpp + BsplineFactory/createRealDouble.cpp) + set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS}) endif(QMC_COMPLEX) endif(HAVE_EINSPLINE) # plane wave SPO - set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWBasis.cpp PlaneWave/PWParameterSet.cpp PlaneWave/PWOrbitalSetBuilder.cpp) + set(FERMION_SRCS ${FERMION_SRCS} + PlaneWave/PWBasis.cpp + PlaneWave/PWBasisT.cpp + PlaneWave/PWOrbitalSetT.cpp + PlaneWave/PWRealOrbitalSetT.cpp + PlaneWave/PWParameterSet.cpp + PlaneWave/PWOrbitalSetBuilder.cpp + ) if(QMC_COMPLEX) set(FERMION_SRCS ${FERMION_SRCS} PlaneWave/PWOrbitalSet.cpp) else() @@ -127,7 +147,7 @@ set(FERMION_SRCS Fermion/BackflowTransformation.cpp Fermion/DiracDeterminantWithBackflow.cpp Fermion/SlaterDetWithBackflow.cpp - SPOSetBuilderFactory.cpp + SPOSetBuilderFactoryT.cpp TrialWaveFunction.cpp TWFdispatcher.cpp TWFFastDerivWrapper.cpp diff --git a/src/QMCWaveFunctions/CompositeSPOSet.cpp b/src/QMCWaveFunctions/CompositeSPOSet.cpp deleted file mode 100644 index 7110a831b1..0000000000 --- a/src/QMCWaveFunctions/CompositeSPOSet.cpp +++ /dev/null @@ -1,197 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "CompositeSPOSet.h" -#include "Utilities/IteratorUtility.h" -#include -#include "OhmmsData/AttributeSet.h" -#include "QMCWaveFunctions/SPOSetBuilderFactory.h" - -namespace qmcplusplus -{ -namespace MatrixOperators -{ -/** copy a small matrix (N, M1) to a big matrix (N, M2), M2>M1 - * @param small input matrix - * @param big outout matrix - * @param offset_c column offset - * - * @todo smater and more efficient matrix, move up for others - * The columns [0,M1) are inserted into [offset_c,offset_c+M1). - */ -template -inline void insert_columns(const MAT1& small, MAT2& big, int offset_c) -{ - const int c = small.cols(); - for (int i = 0; i < small.rows(); ++i) - std::copy(small[i], small[i] + c, big[i] + offset_c); -} -} // namespace MatrixOperators - -CompositeSPOSet::CompositeSPOSet(const std::string& my_name) : SPOSet(my_name) -{ - OrbitalSetSize = 0; - component_offsets.reserve(4); -} - -CompositeSPOSet::CompositeSPOSet(const CompositeSPOSet& other) : SPOSet(other) -{ - for (auto& element : other.components) - { - this->add(element->makeClone()); - } -} - -CompositeSPOSet::~CompositeSPOSet() = default; - -void CompositeSPOSet::add(std::unique_ptr component) -{ - if (components.empty()) - component_offsets.push_back(0); //add 0 - - int norbs = component->size(); - components.push_back(std::move(component)); - component_values.emplace_back(norbs); - component_gradients.emplace_back(norbs); - component_laplacians.emplace_back(norbs); - - OrbitalSetSize += norbs; - component_offsets.push_back(OrbitalSetSize); -} - -void CompositeSPOSet::report() -{ - app_log() << "CompositeSPOSet" << std::endl; - app_log() << " ncomponents = " << components.size() << std::endl; - app_log() << " components" << std::endl; - for (int i = 0; i < components.size(); ++i) - { - app_log() << " " << i << std::endl; - components[i]->basic_report(" "); - } -} - -std::unique_ptr CompositeSPOSet::makeClone() const { return std::make_unique(*this); } - -void CompositeSPOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - int n = 0; - for (int c = 0; c < components.size(); ++c) - { - SPOSet& component = *components[c]; - ValueVector& values = component_values[c]; - component.evaluateValue(P, iat, values); - std::copy(values.begin(), values.end(), psi.begin() + n); - n += component.size(); - } -} - -void CompositeSPOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - int n = 0; - for (int c = 0; c < components.size(); ++c) - { - SPOSet& component = *components[c]; - ValueVector& values = component_values[c]; - GradVector& gradients = component_gradients[c]; - ValueVector& laplacians = component_laplacians[c]; - component.evaluateVGL(P, iat, values, gradients, laplacians); - std::copy(values.begin(), values.end(), psi.begin() + n); - std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n); - std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n); - n += component.size(); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) - { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - ValueMatrix l(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, l); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(l, d2logdet, n); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - const int nat = last - first; - for (int c = 0; c < components.size(); ++c) - { - int norb = components[c]->size(); - ValueMatrix v(nat, norb); - GradMatrix g(nat, norb); - HessMatrix h(nat, norb); - components[c]->evaluate_notranspose(P, first, last, v, g, h); - int n = component_offsets[c]; - MatrixOperators::insert_columns(v, logdet, n); - MatrixOperators::insert_columns(g, dlogdet, n); - MatrixOperators::insert_columns(h, grad_grad_logdet, n); - } -} - -void CompositeSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); -} - - -std::unique_ptr CompositeSPOSetBuilder::createSPOSetFromXML(xmlNodePtr cur) -{ - std::vector spolist; - putContent(spolist, cur); - if (spolist.empty()) - { - return nullptr; - } - - auto spo_now = std::make_unique(getXMLAttributeValue(cur, "name")); - for (int i = 0; i < spolist.size(); ++i) - { - const SPOSet* spo = sposet_builder_factory_.getSPOSet(spolist[i]); - if (spo) - spo_now->add(spo->makeClone()); - } - return (spo_now->size()) ? std::unique_ptr{std::move(spo_now)} : nullptr; -} - -std::unique_ptr CompositeSPOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) -{ - return createSPOSetFromXML(cur); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/CompositeSPOSet.h b/src/QMCWaveFunctions/CompositeSPOSet.h index 1c03eb356f..1663c2f0d8 100644 --- a/src/QMCWaveFunctions/CompositeSPOSet.h +++ b/src/QMCWaveFunctions/CompositeSPOSet.h @@ -15,91 +15,13 @@ #ifndef QMCPLUSPLUS_COMPOSITE_SPOSET_H #define QMCPLUSPLUS_COMPOSITE_SPOSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "Configuration.h" +#include "QMCWaveFunctions/CompositeSPOSetT.h" namespace qmcplusplus { -class CompositeSPOSet : public SPOSet -{ -public: - ///component SPOSets - std::vector> components; - ///temporary storage for values - std::vector component_values; - ///temporary storage for gradients - std::vector component_gradients; - ///temporary storage for laplacians - std::vector component_laplacians; - ///store the precomputed offsets - std::vector component_offsets; - - CompositeSPOSet(const std::string& my_name); - CompositeSPOSet(const CompositeSPOSet& other); - ~CompositeSPOSet() override; - - std::string getClassName() const override { return "CompositeSPOSet"; } - - ///add a sposet component to this composite sposet - void add(std::unique_ptr component); - - ///print out component info - void report(); - - //SPOSet interface methods - ///size is determined by component sposets and nothing else - inline void setOrbitalSetSize(int norbs) override {} - - std::unique_ptr makeClone() const override; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - ///unimplemented functions call this to abort - inline void not_implemented(const std::string& method) - { - APP_ABORT("CompositeSPOSet::" + method + " has not been implemented"); - } - - //methods to be implemented in the future (possibly) - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; -}; - -struct CompositeSPOSetBuilder : public SPOSetBuilder -{ - CompositeSPOSetBuilder(Communicate* comm, const SPOSetBuilderFactory& factory) - : SPOSetBuilder("Composite", comm), sposet_builder_factory_(factory) - {} - - //SPOSetBuilder interface - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - - /// reference to the sposet_builder_factory - const SPOSetBuilderFactory& sposet_builder_factory_; -}; +using CompositeSPOSet = CompositeSPOSetT; +using CompositeSPOSetBuilder = CompositeSPOSetBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp new file mode 100644 index 0000000000..85a0d06848 --- /dev/null +++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp @@ -0,0 +1,236 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "QMCWaveFunctions/CompositeSPOSetT.h" + +#include "OhmmsData/AttributeSet.h" +#include "Utilities/IteratorUtility.h" + +#include + +namespace qmcplusplus +{ +namespace MatrixOperators +{ +/** copy a small matrix (N, M1) to a big matrix (N, M2), M2>M1 + * @param small input matrix + * @param big outout matrix + * @param offset_c column offset + * + * @todo smater and more efficient matrix, move up for others + * The columns [0,M1) are inserted into [offset_c,offset_c+M1). + */ +template +inline void insert_columns(const MAT1& small, MAT2& big, int offset_c) +{ + const int c = small.cols(); + for (int i = 0; i < small.rows(); ++i) + std::copy(small[i], small[i] + c, big[i] + offset_c); +} +} // namespace MatrixOperators + +template +CompositeSPOSetT::CompositeSPOSetT(const std::string& my_name) : SPOSetT(my_name) +{ + this->OrbitalSetSize = 0; + component_offsets.reserve(4); +} + +template +CompositeSPOSetT::CompositeSPOSetT(const CompositeSPOSetT& other) : SPOSetT(other) +{ + for (auto& element : other.components) + { + this->add(element->makeClone()); + } +} + +template +CompositeSPOSetT::~CompositeSPOSetT() = default; + +template +void CompositeSPOSetT::add(std::unique_ptr> component) +{ + if (components.empty()) + component_offsets.push_back(0); // add 0 + + int norbs = component->size(); + components.push_back(std::move(component)); + component_values.emplace_back(norbs); + component_gradients.emplace_back(norbs); + component_laplacians.emplace_back(norbs); + + this->OrbitalSetSize += norbs; + component_offsets.push_back(this->OrbitalSetSize); +} + +template +void CompositeSPOSetT::report() +{ + app_log() << "CompositeSPOSetT" << std::endl; + app_log() << " ncomponents = " << components.size() << std::endl; + app_log() << " components" << std::endl; + for (int i = 0; i < components.size(); ++i) + { + app_log() << " " << i << std::endl; + components[i]->basic_report(" "); + } +} + +template +std::unique_ptr> CompositeSPOSetT::makeClone() const +{ + return std::make_unique>(*this); +} + +template +void CompositeSPOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + int n = 0; + for (int c = 0; c < components.size(); ++c) + { + SPOSetT& component = *components[c]; + ValueVector& values = component_values[c]; + component.evaluateValue(P, iat, values); + std::copy(values.begin(), values.end(), psi.begin() + n); + n += component.size(); + } +} + +template +void CompositeSPOSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + int n = 0; + for (int c = 0; c < components.size(); ++c) + { + SPOSetT& component = *components[c]; + ValueVector& values = component_values[c]; + GradVector& gradients = component_gradients[c]; + ValueVector& laplacians = component_laplacians[c]; + component.evaluateVGL(P, iat, values, gradients, laplacians); + std::copy(values.begin(), values.end(), psi.begin() + n); + std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n); + std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n); + n += component.size(); + } +} + +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + const int nat = last - first; + for (int c = 0; c < components.size(); ++c) + { + int norb = components[c]->size(); + ValueMatrix v(nat, norb); + GradMatrix g(nat, norb); + ValueMatrix l(nat, norb); + components[c]->evaluate_notranspose(P, first, last, v, g, l); + int n = component_offsets[c]; + MatrixOperators::insert_columns(v, logdet, n); + MatrixOperators::insert_columns(g, dlogdet, n); + MatrixOperators::insert_columns(l, d2logdet, n); + } +} + +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) +{ + const int nat = last - first; + for (int c = 0; c < components.size(); ++c) + { + int norb = components[c]->size(); + ValueMatrix v(nat, norb); + GradMatrix g(nat, norb); + HessMatrix h(nat, norb); + components[c]->evaluate_notranspose(P, first, last, v, g, h); + int n = component_offsets[c]; + MatrixOperators::insert_columns(v, logdet, n); + MatrixOperators::insert_columns(g, dlogdet, n); + MatrixOperators::insert_columns(h, grad_grad_logdet, n); + } +} + +template +void CompositeSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) +{ + not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); +} + +template +std::unique_ptr> CompositeSPOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +{ + std::vector spolist; + putContent(spolist, cur); + if (spolist.empty()) + { + return nullptr; + } + + auto spo_now = std::make_unique>(getXMLAttributeValue(cur, "name")); + for (int i = 0; i < spolist.size(); ++i) + { + const SPOSetT* spo = sposet_builder_factory_.getSPOSet(spolist[i]); + if (spo) + spo_now->add(spo->makeClone()); + } + return (spo_now->size()) ? std::unique_ptr>{std::move(spo_now)} : nullptr; +} + +template +std::unique_ptr> CompositeSPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) +{ + return createSPOSetFromXML(cur); +} + +// Class concrete types from ValueType + +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class CompositeSPOSetT; +template class CompositeSPOSetBuilderT; +#else +template class CompositeSPOSetT; +template class CompositeSPOSetBuilderT; +#endif +#else +#ifndef MIXED_PRECISION +template class CompositeSPOSetT>; +template class CompositeSPOSetBuilderT>; +#else +template class CompositeSPOSetT>; +template class CompositeSPOSetBuilderT>; +#endif +#endif + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h new file mode 100644 index 0000000000..161bbf5435 --- /dev/null +++ b/src/QMCWaveFunctions/CompositeSPOSetT.h @@ -0,0 +1,119 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_COMPOSITE_SPOSETT_H +#define QMCPLUSPLUS_COMPOSITE_SPOSETT_H + +#include "QMCWaveFunctions/BasisSetBase.h" +#include "QMCWaveFunctions/SPOSetBuilderFactoryT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +class CompositeSPOSetT : public SPOSetT +{ +public: + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + + /// component SPOSets + std::vector>> components; + /// temporary storage for values + std::vector component_values; + /// temporary storage for gradients + std::vector component_gradients; + /// temporary storage for laplacians + std::vector component_laplacians; + /// store the precomputed offsets + std::vector component_offsets; + + CompositeSPOSetT(const std::string& my_name); + /** + * @TODO: do we want template copy constructor + * (i.e., copy from other with different type argument)? + */ + CompositeSPOSetT(const CompositeSPOSetT& other); + ~CompositeSPOSetT() override; + + std::string getClassName() const override { return "CompositeSPOSetT"; } + + /// add a sposet component to this composite sposet + void add(std::unique_ptr> component); + + /// print out component info + void report(); + + // SPOSet interface methods + /// size is determined by component sposets and nothing else + inline void setOrbitalSetSize(int norbs) override {} + + std::unique_ptr> makeClone() const override; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + /// unimplemented functions call this to abort + inline void not_implemented(const std::string& method) + { + APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented"); + } + + // methods to be implemented in the future (possibly) + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet, + GGGMatrix& dddlogdet) override; +}; + +template +class CompositeSPOSetBuilderT : public SPOSetBuilderT +{ +public: + CompositeSPOSetBuilderT(Communicate* comm, const SPOSetBuilderFactoryT& factory) + : SPOSetBuilderT("Composite", comm), sposet_builder_factory_(factory) + {} + + // SPOSetBuilder interface + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + + std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; + + /// reference to the sposet_builder_factory + const SPOSetBuilderFactoryT& sposet_builder_factory_; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp new file mode 100644 index 0000000000..9c141ddd71 --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp @@ -0,0 +1,1816 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" + +#include "CPU/SIMD/vmath.hpp" +#include "CPU/e2iphi.h" +#include "CPU/math.hpp" +#include "Message/CommOperators.h" +#include "Message/Communicate.h" +#include "OhmmsData/AttributeSet.h" +#include "Particle/DistanceTableT.h" +#include "ParticleBase/RandomSeqGenerator.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineSetT.h" +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" +#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "Utilities/ProgressReportEngine.h" +#include "Utilities/Timer.h" +#include "Utilities/qmc_common.h" +#include +#include +#include + +#include +#include +#include + +namespace qmcplusplus +{ +// std::map EinsplineSetBuilder::SPOSetMap; +// std::map,EinsplineSetBuilder::OrbType*,Int4less> +// EinsplineSetBuilder::OrbitalMap; +////std::map +/// EinsplineSetBuilder::ExtendedMap_z; +////std::map +/// EinsplineSetBuilder::ExtendedMap_d; + +template +EinsplineSetBuilderT::EinsplineSetBuilderT(ParticleSetT& p, + const PSetMap& psets, + Communicate* comm, + xmlNodePtr cur) + : SPOSetBuilderT("spline", comm), + ParticleSets(psets), + TargetPtcl(p), + XMLRoot(cur), + Format(QMCPACK), + NumBands(0), + NumElectrons(0), + NumSpins(0), + NumTwists(0), + MeshFactor(1.0), + MeshSize(0, 0, 0), + twist_num_(-1), + LastSpinSet(-1), + NumOrbitalsRead(-1), + makeRotations(false) +{ + this->ClassName = "EinsplineSetBuilder"; + + MatchingTol = 10 * std::numeric_limits::epsilon(); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + TileMatrix(i, j) = 0; + + // invalidate states by the basis class + this->states.clear(); + this->states.resize(p.groups()); + + // create vectors with nullptr + FullBands.resize(p.groups()); +} + +template +inline TinyVector IntPart(const TinyVector& twist) +{ + return TinyVector(round(twist[0] - 1.0e-6), round(twist[1] - 1.0e-6), round(twist[2] - 1.0e-6)); +} + +template +inline TinyVector FracPart(const TinyVector& twist) +{ + return twist - IntPart(twist); +} + +template +EinsplineSetBuilderT::~EinsplineSetBuilderT() +{ + DEBUG_MEMORY("EinsplineSetBuilder::~EinsplineSetBuilder"); +} + +template +bool EinsplineSetBuilderT::CheckLattice() +{ + double diff = 0.0; + for (int i = 0; i < OHMMS_DIM; i++) + for (int j = 0; j < OHMMS_DIM; j++) + { + double max_abs = + std::max(std::abs(SuperLattice(i, j)), static_cast(std::abs(TargetPtcl.getLattice().R(i, j)))); + if (max_abs > MatchingTol) + diff = std::max(diff, std::abs(SuperLattice(i, j) - TargetPtcl.getLattice().R(i, j)) / max_abs); + } + + if (diff > MatchingTol) + { + std::ostringstream o; + o.setf(std::ios::scientific, std::ios::floatfield); + o.precision(6); + o << "EinsplineSetBuilder::ReadOrbitalInfo_ESHDF \n" + << "Mismatched supercell lattices.\n"; + o << " Lattice in ESHDF5 " << std::endl; + o << SuperLattice << std::endl; + o << " Lattice in xml" << std::endl; + o << TargetPtcl.getLattice().R << std::endl; + o << " Difference " << std::endl; + o << SuperLattice - TargetPtcl.getLattice().R << std::endl; + o << " Max relative error = " << diff << std::endl; + o << " Tolerance = " << MatchingTol << std::endl; + app_error() << o.str(); + return false; + } + return true; +} + +template +void EinsplineSetBuilderT::BroadcastOrbitalInfo() +{ + if (this->myComm->size() == 1) + return; + int numIons = IonTypes.size(); + int numDensityGvecs = TargetPtcl.DensityReducedGvecs.size(); + PooledData abuffer; + PooledData aibuffer; + aibuffer.add(Version.begin(), Version.end()); // myComm->bcast(Version); + aibuffer.add(Format); + abuffer.add(Lattice.begin(), Lattice.end()); // myComm->bcast(Lattice); + abuffer.add(RecipLattice.begin(), + RecipLattice.end()); // myComm->bcast(RecipLattice); + abuffer.add(SuperLattice.begin(), + SuperLattice.end()); // myComm->bcast(SuperLattice); + abuffer.add(LatticeInv.begin(), LatticeInv.end()); // myComm->bcast(LatticeInv); + aibuffer.add(NumBands); // myComm->bcast(NumBands); + aibuffer.add(NumElectrons); // myComm->bcast(NumElectrons); + aibuffer.add(NumSpins); // myComm->bcast(NumSpins); + aibuffer.add(NumTwists); // myComm->bcast(NumTwists); + aibuffer.add(numIons); // myComm->bcast(numIons); + aibuffer.add(numDensityGvecs); + aibuffer.add(HaveOrbDerivs); + this->myComm->bcast(abuffer); + this->myComm->bcast(aibuffer); + if (this->myComm->rank()) + { + abuffer.rewind(); + aibuffer.rewind(); + aibuffer.get(Version.begin(), Version.end()); + aibuffer.get(Format); + abuffer.get(Lattice.begin(), Lattice.end()); + abuffer.get(RecipLattice.begin(), RecipLattice.end()); + abuffer.get(SuperLattice.begin(), SuperLattice.end()); + abuffer.get(LatticeInv.begin(), LatticeInv.end()); + aibuffer.get(NumBands); + aibuffer.get(NumElectrons); + aibuffer.get(NumSpins); + aibuffer.get(NumTwists); + aibuffer.get(numIons); + aibuffer.get(numDensityGvecs); + aibuffer.get(HaveOrbDerivs); + TargetPtcl.DensityReducedGvecs.resize(numDensityGvecs); + TargetPtcl.Density_G.resize(numDensityGvecs); + } + if (IonTypes.size() != numIons) + { + IonTypes.resize(numIons); + IonPos.resize(numIons); + } + // new buffer + PooledData bbuffer; + PooledData bibuffer; + for (int i = 0; i < numIons; ++i) + bibuffer.add(IonTypes[i]); + // myComm->bcast(IonTypes); + bbuffer.add(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons); + // myComm->bcast(IonPos); + if (primcell_kpoints.size() != NumTwists) + primcell_kpoints.resize(NumTwists); + bbuffer.add(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists); + bibuffer.add(&(TargetPtcl.DensityReducedGvecs[0][0]), + &(TargetPtcl.DensityReducedGvecs[0][0]) + numDensityGvecs * OHMMS_DIM); + bbuffer.add(&(TargetPtcl.Density_G[0]), &(TargetPtcl.Density_G[0]) + numDensityGvecs); + this->myComm->bcast(bbuffer); + this->myComm->bcast(bibuffer); + if (this->myComm->rank()) + { + bbuffer.rewind(); + bibuffer.rewind(); + for (int i = 0; i < numIons; ++i) + bibuffer.get(IonTypes[i]); + bbuffer.get(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons); + bbuffer.get(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists); + bibuffer.get(&(TargetPtcl.DensityReducedGvecs[0][0]), + &(TargetPtcl.DensityReducedGvecs[0][0]) + numDensityGvecs * OHMMS_DIM); + bbuffer.get(&(TargetPtcl.Density_G[0]), &(TargetPtcl.Density_G[0]) + numDensityGvecs); + } + // buffer to bcast hybrid representation atomic orbital info + PooledData cbuffer; + PooledData cibuffer; + this->myComm->bcast(cbuffer); + this->myComm->bcast(cibuffer); + AtomicCentersInfo.resize(numIons); + Super2Prim.resize(SourcePtcl->R.size()); + cbuffer.add(AtomicCentersInfo.inner_cutoff.begin(), AtomicCentersInfo.inner_cutoff.end()); + cbuffer.add(AtomicCentersInfo.non_overlapping_radius.begin(), AtomicCentersInfo.non_overlapping_radius.end()); + cbuffer.add(AtomicCentersInfo.cutoff.begin(), AtomicCentersInfo.cutoff.end()); + cbuffer.add(AtomicCentersInfo.spline_radius.begin(), AtomicCentersInfo.spline_radius.end()); + cibuffer.add(Super2Prim.begin(), Super2Prim.end()); + cibuffer.add(AtomicCentersInfo.lmax.begin(), AtomicCentersInfo.lmax.end()); + cibuffer.add(AtomicCentersInfo.GroupID.begin(), AtomicCentersInfo.GroupID.end()); + cibuffer.add(AtomicCentersInfo.spline_npoints.begin(), AtomicCentersInfo.spline_npoints.end()); + this->myComm->bcast(cbuffer); + this->myComm->bcast(cibuffer); + if (this->myComm->rank()) + { + cbuffer.rewind(); + cibuffer.rewind(); + cbuffer.get(AtomicCentersInfo.inner_cutoff.begin(), AtomicCentersInfo.inner_cutoff.end()); + cbuffer.get(AtomicCentersInfo.non_overlapping_radius.begin(), AtomicCentersInfo.non_overlapping_radius.end()); + cbuffer.get(AtomicCentersInfo.cutoff.begin(), AtomicCentersInfo.cutoff.end()); + cbuffer.get(AtomicCentersInfo.spline_radius.begin(), AtomicCentersInfo.spline_radius.end()); + cibuffer.get(Super2Prim.begin(), Super2Prim.end()); + cibuffer.get(AtomicCentersInfo.lmax.begin(), AtomicCentersInfo.lmax.end()); + cibuffer.get(AtomicCentersInfo.GroupID.begin(), AtomicCentersInfo.GroupID.end()); + cibuffer.get(AtomicCentersInfo.spline_npoints.begin(), AtomicCentersInfo.spline_npoints.end()); + for (int i = 0; i < numIons; i++) + AtomicCentersInfo.ion_pos[i] = IonPos[i]; + } +} + +//////////////////////////////////////////////////////////////// +//// Create the ion ParticleSet from the data in the HDF file // +//////////////////////////////////////////////////////////////// +// void +// EinsplineSetBuilder::CreateIonParticleSet( std::string sourceName) +//{ +// // ParticleSet &pTemp = *(new MCWalkerConfiguration); +// ParticleSet &pTemp = *(new ParticleSet); +// pTemp.setName (sourceName); +// SpeciesSet& tspecies(pTemp.getSpeciesSet()); +// ParticleSets[sourceName] = &pTemp; +// } +// + +template +void EinsplineSetBuilderT::TileIons() +{ + // set the primitive lattice + SourcePtcl->getPrimitiveLattice().set(Lattice); + + for (int j = 0; j < IonPos.size(); ++j) + IonPos[j] = FracPart(SourcePtcl->getPrimitiveLattice().toUnit(IonPos[j])); + + IonPos.resize(SourcePtcl->getTotalNum()); + IonTypes.resize(SourcePtcl->getTotalNum()); + std::copy(SourcePtcl->R.begin(), SourcePtcl->R.end(), IonPos.begin()); + std::copy(SourcePtcl->GroupID.begin(), SourcePtcl->GroupID.end(), IonTypes.begin()); + + // app_log() << " Primitive Cell\n"; + // SourcePtcl->getPrimitiveLattice().print(app_log()); + // app_log() << " Super Cell\n"; + // SourcePtcl->Lattice.print(app_log()); + + // Don't need to do this, already one by ParticleSetPool.cpp + // Vector > primPos = IonPos; + // Vector primTypes = IonTypes; + // int numCopies = std::abs(det(TileMatrix)); + // IonTypes.resize(primPos.size()*numCopies); + // IonPos.resize (primPos.size()*numCopies); + // int maxCopies = 10; + // using Vec3 = TinyVector; + // int index=0; + // for (int i0=-maxCopies; i0<=maxCopies; i0++) + // for (int i1=-maxCopies; i1<=maxCopies; i1++) + // for (int i2=-maxCopies; i2<=maxCopies; i2++) + // for (int iat=0; iat < primPos.size(); iat++) + // { + // Vec3 r = primPos[iat]; + // Vec3 uPrim = PrimCell.toUnit(r); + // for (int i=0; i<3; i++) + // uPrim[i] -= std::floor(uPrim[i]); + // r = PrimCell.toCart(uPrim) + (double)i0*PrimCell.a(0) + + // (double)i1*PrimCell.a(1) + (double)i2*PrimCell.a(2); + // Vec3 uSuper = SuperCell.toUnit(r); + // if ((uSuper[0] >= -1.0e-4) && (uSuper[0] < 0.9999) && + // (uSuper[1] >= -1.0e-4) && (uSuper[1] < 0.9999) && + // (uSuper[2] >= -1.0e-4) && (uSuper[2] < 0.9999)) + // { + // IonPos[index]= r; + // IonTypes[index]= primTypes[iat]; + // index++; + // } + // } + // if (index != primPos.size()*numCopies) + // { + // app_error() << "The number of tiled ions, " << IonPos.size() + // << ", does not match the expected number of " + // << primPos.size()*numCopies << " or the index "<< index + // <<". Aborting.\n"; + // APP_ABORT("EinsplineSetBuilder::TileIons()"); + // } + // if (myComm->rank() == 0) + // { + // char buf[1000]; + // snprintf (buf, 1000, "Supercell reduced ion positions = \n"); + // app_log() << buf; + // app_log().flush(); + // for (int i=0; i +bool EinsplineSetBuilderT::TwistPair(PosType a, PosType b) const +{ + bool pair = true; + for (int n = 0; n < OHMMS_DIM; n++) + { + double d = a[n] + b[n]; + if (std::abs(d - round(d)) > MatchingTol) + pair = false; + } + return pair; +} + +template +void EinsplineSetBuilderT::AnalyzeTwists2(const int twist_num_inp, const TinyVector& twist_inp) +{ + Tensor S; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + S(i, j) = (double)TileMatrix(i, j); + + const int num_prim_kpoints = primcell_kpoints.size(); + + // build a list of unique super twists that all the primitive cell k-point + // correspond to. + std::vector superFracs; // twist super twist coordinates + std::vector superIndex; // the indices of the super twists that correpsond to all + // the primitive cell k-points in the unique list. + { + // scan all the primitive cell k-points + for (int ki = 0; ki < num_prim_kpoints; ki++) + { + PosType primTwist = primcell_kpoints[ki]; + PosType superTwist = dot(S, primTwist); + PosType kp = PrimCell.k_cart(primTwist); + PosType ks = SuperCell.k_cart(superTwist); + // check the consistency of tiling, primitive and super cells. + if (dot(ks - kp, ks - kp) > 1.0e-6) + { + app_error() << "Primitive and super k-points do not agree. " + "Error in coding.\n"; + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + PosType frac = FracPart(superTwist); + // verify if the super twist that correpsonds to this primitive cell + // k-point exists in the unique list or not. + bool found = false; + for (int j = 0; j < superFracs.size(); j++) + { + PosType diff = frac - superFracs[j]; + if (dot(diff, diff) < 1.0e-6) + { + found = true; + superIndex.push_back(j); + } + } + if (!found) + { + superIndex.push_back(superFracs.size()); + superFracs.push_back(frac); + } + } + assert(superIndex.size() == num_prim_kpoints); + } + + const int numSuperTwists = superFracs.size(); + { + app_log() << "Found " << numSuperTwists << " distinct supercell twist" << (numSuperTwists > 1 ? "s" : "") + << " based on " << num_prim_kpoints << " primitive cell k-point" << (num_prim_kpoints > 1 ? "s" : "") + << std::endl; + if (this->myComm->rank() == 0) + { + int n_tot_irred(0); + for (int si = 0; si < numSuperTwists; si++) + { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), "Super twist #%d: [ %9.5f %9.5f %9.5f ]\n", si, + superFracs[si][0], superFracs[si][1], superFracs[si][2]); + if (length < 0) + throw std::runtime_error("Error converting Super twist to a string"); + app_log() << std::string_view(buf.data(), length); + app_log().flush(); + } + } + } + + // For each supercell twist, create a list of primitive twists which + // correspond to it. + std::vector> superSets; + { + superSets.resize(numSuperTwists); + for (int ki = 0; ki < num_prim_kpoints; ki++) + superSets[superIndex[ki]].push_back(ki); + } + + { // look up a super cell twist and return its index in the unique list of + // super cell twists. + std::function find_twist = [&](const TinyVector& twist) { + int twist_num = -1; + PosType gtFrac = FracPart(twist); + float eps = 1e-5; + for (int si = 0; si < numSuperTwists; si++) + { + PosType locDiff = gtFrac - superFracs[si]; + if (dot(locDiff, locDiff) < eps) + twist_num = si; + } + + if (twist_num < 0) + { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), + "AnalyzeTwists2. Input twist [ %9.5f %9.5f %9.5f] not " + "found in the list of super twists above.\n", + twist[0], twist[1], twist[2]); + if (length < 0) + throw std::runtime_error("Error generating error message"); + throw UniformCommunicateError(buf.data()); + } + return twist_num; + }; + + if (twist_inp[0] > TWIST_NO_INPUT || twist_inp[1] > TWIST_NO_INPUT || twist_inp[2] > TWIST_NO_INPUT) + { + if (twist_num_inp != TWISTNUM_NO_INPUT) + app_warning() << "twist attribute exists. twistnum attribute ignored. " + "To prevent this message, remove twistnum from input." + << std::endl; + + twist_num_ = find_twist(twist_inp); + } + else if (twist_num_inp != TWISTNUM_NO_INPUT) + { + app_warning() << "twist attribute does't exist but twistnum " + "attribute was found. " + << "This is potentially ambiguous. Specifying twist " + "attribute is preferred." + << std::endl; + if (twist_num_inp < 0 || twist_num_inp >= numSuperTwists) + { + std::ostringstream msg; + msg << "AnalyzeTwists2. twistnum input value " << twist_num_inp << " is outside the acceptable range [0, " + << numSuperTwists << ")." << std::endl; + throw UniformCommunicateError(msg.str()); + } + twist_num_ = twist_num_inp; + } + else + { + app_log() << "twist attribte does't exist. Set Gamma point." << std::endl; + twist_num_ = find_twist({0, 0, 0}); + } + + assert(twist_num_ >= 0 && twist_num_ < numSuperTwists); + + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), " Using supercell twist %d: [ %9.5f %9.5f %9.5f]", twist_num_, + superFracs[twist_num_][0], superFracs[twist_num_][1], superFracs[twist_num_][2]); + if (length < 0) + throw std::runtime_error("Error converting supercell twist to a string"); + app_log() << std::string_view(buf.data(), length) << std::endl; + } + + TargetPtcl.setTwist(superFracs[twist_num_]); + if constexpr (!IsComplex_t{}()) + { + // Check to see if supercell twist is okay to use with real wave + // functions + for (int dim = 0; dim < OHMMS_DIM; dim++) + { + double t = 2.0 * superFracs[twist_num_][dim]; + if (std::abs(t - round(t)) > MatchingTol * 100) + { + app_error() << "Cannot use this super twist with real wavefunctions.\n" + << "Please recompile with QMC_COMPLEX=1.\n"; + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + } + } + // Now check to see that each supercell twist has the right twists + // to tile the primitive cell orbitals. + const int numTwistsNeeded = std::abs(det(TileMatrix)); + for (int si = 0; si < numSuperTwists; si++) + { + // First make sure we have enough points + if (superSets[si].size() != numTwistsNeeded) + { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), "Super twist %d should own %d k-points, but owns %d.\n", si, + numTwistsNeeded, static_cast(superSets[si].size())); + if (length < 0) + throw std::runtime_error("Error generating Super twist string"); + app_error() << std::string_view(buf.data(), length); + if (si == twist_num_) + { + APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2"); + } + else + continue; + } + // Now, make sure they are all distinct + int N = superSets[si].size(); + for (int i = 0; i < N; i++) + { + PosType twistPrim_i = primcell_kpoints[superSets[si][i]]; + PosType twistSuper_i = dot(S, twistPrim_i); + PosType superInt_i = IntPart(twistSuper_i); + for (int j = i + 1; j < N; j++) + { + PosType twistPrim_j = primcell_kpoints[superSets[si][j]]; + PosType twistSuper_j = dot(S, twistPrim_j); + PosType superInt_j = IntPart(twistSuper_j); + if (dot(superInt_i - superInt_j, superInt_i - superInt_j) < 1.0e-6) + { + app_error() << "Identical k-points detected in super twist set " << si << std::endl; + APP_ABORT_TRACE(__FILE__, __LINE__, "AnalyzeTwists2"); + } + } + } + } + app_log().flush(); + // Finally, record which k-points to include on this group of + // processors, which have been assigned supercell twist twist_num_ + IncludeTwists.clear(); + for (int i = 0; i < superSets[twist_num_].size(); i++) + IncludeTwists.push_back(superSets[twist_num_][i]); + // Now, find out which twists are distinct + DistinctTwists.clear(); + if constexpr (!IsComplex_t{}()) + { + std::vector copyTwists; + for (int i = 0; i < IncludeTwists.size(); i++) + { + int ti = IncludeTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + bool distinct = true; + for (int j = i + 1; j < IncludeTwists.size(); j++) + { + int tj = IncludeTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + PosType sum = twist_i + twist_j; + PosType diff = twist_i - twist_j; + if (TwistPair(twist_i, twist_j)) + distinct = false; + } + if (distinct) + DistinctTwists.push_back(ti); + else + copyTwists.push_back(ti); + } + // Now determine which distinct twists require two copies + MakeTwoCopies.resize(DistinctTwists.size()); + for (int i = 0; i < DistinctTwists.size(); i++) + { + MakeTwoCopies[i] = false; + int ti = DistinctTwists[i]; + PosType twist_i = primcell_kpoints[ti]; + for (int j = 0; j < copyTwists.size(); j++) + { + int tj = copyTwists[j]; + PosType twist_j = primcell_kpoints[tj]; + if (TwistPair(twist_i, twist_j)) + MakeTwoCopies[i] = true; + } + if (this->myComm->rank() == 0) + { + std::array buf; + int length = std::snprintf(buf.data(), buf.size(), "Using %d copies of twist angle [%6.3f, %6.3f, %6.3f]\n", + MakeTwoCopies[i] ? 2 : 1, twist_i[0], twist_i[1], twist_i[2]); + if (length < 0) + throw std::runtime_error("Error generating string"); + app_log() << std::string_view(buf.data(), length); + app_log().flush(); + } + } + // Find out if we can make real orbitals + use_real_splines_ = true; + for (int i = 0; i < DistinctTwists.size(); i++) + { + int ti = DistinctTwists[i]; + PosType twist = primcell_kpoints[ti]; + for (int j = 0; j < OHMMS_DIM; j++) + if (std::abs(twist[j] - 0.0) > MatchingTol && std::abs(twist[j] - 0.5) > MatchingTol && + std::abs(twist[j] + 0.5) > MatchingTol) + use_real_splines_ = false; + } + if (use_real_splines_ && (DistinctTwists.size() > 1)) + { + app_log() << "***** Use of real orbitals is possible, but not " + "currently implemented\n" + << " with more than one twist angle.\n"; + use_real_splines_ = false; + } + if (use_real_splines_) + app_log() << "Using real splines.\n"; + else + app_log() << "Using complex splines.\n"; + } + else + { + DistinctTwists.resize(IncludeTwists.size()); + MakeTwoCopies.resize(IncludeTwists.size()); + for (int i = 0; i < IncludeTwists.size(); i++) + { + DistinctTwists[i] = IncludeTwists[i]; + MakeTwoCopies[i] = false; + } + use_real_splines_ = false; + } +} + +template +void EinsplineSetBuilderT::OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks) +{ + if (this->myComm->rank() != 0) + return; + if (spin >= NumSpins && !skipChecks) + { + app_error() << "To developer: User is requesting for orbitals in an " + "invalid spin group " + << spin << ". Current h5 file only contains spin groups " + << "[0.." << NumSpins - 1 << "]." << std::endl; + app_error() << "To user: Orbital H5 file contains no spin down data " + "and is appropriate only for spin unpolarized " + "calculations. " + << "If this is your intent, please replace 'spindataset=1' " + "with 'spindataset=0' in the input file." + << std::endl; + abort(); + } + if (Format == ESHDF) + { + OccupyBands_ESHDF(spin, sortBands, numOrbs); + return; + } + std::string eigenstatesGroup; + if (Version[0] == 0 && Version[1] == 11) + eigenstatesGroup = "/eigenstates_3"; + else if (Version[0] == 0 && Version[1] == 20) + eigenstatesGroup = "/eigenstates"; + + if (FullBands[spin]->size()) + { + app_log() << " FullBand[" << spin << "] exists. Reuse it. " << std::endl; + return; + } + + std::vector& SortBands(*FullBands[spin]); + + SortBands.clear(); + for (int ti = 0; ti < DistinctTwists.size(); ti++) + { + int tindex = DistinctTwists[ti]; + // First, read valence states + for (int bi = 0; bi < NumBands; bi++) + { + BandInfo band; + band.TwistIndex = tindex; + band.BandIndex = bi; + band.MakeTwoCopies = MakeTwoCopies[ti]; + // Read eigenenergy from file + std::ostringstream ePath, sPath; + if ((Version[0] == 0 && Version[1] == 11) || NumTwists > 1) + { + ePath << eigenstatesGroup << "/twist_" << tindex << "/band_" << bi << "/eigenvalue"; + sPath << eigenstatesGroup << "/twist_" << tindex << "/band_" << bi << "/spin"; + } + else if (NumBands > 1) + { + ePath << eigenstatesGroup << "/twist/band_" << bi << "/eigenvalue"; + sPath << eigenstatesGroup << "/twist/band_" << bi << "/spin"; + } + else + { + ePath << eigenstatesGroup << "/twist/band/eigenvalue"; + sPath << eigenstatesGroup << "/twist/band/spin"; + } + band.Energy = -1.01e100; + H5File.read(band.Energy, ePath.str()); + if (band.Energy > -1.0e100) + { + H5File.read(band.Spin, sPath.str()); + if (band.Spin == spin) + SortBands.push_back(band); + } + } + } + int orbIndex = 0; + int numOrbs_counter = 0; + while (numOrbs_counter < numOrbs) + { + if (SortBands[orbIndex].MakeTwoCopies) + numOrbs_counter += 2; + else + numOrbs_counter++; + orbIndex++; + } + NumDistinctOrbitals = orbIndex; + app_log() << "We will read " << NumDistinctOrbitals << " distinct orbitals.\n"; +} + +template +void EinsplineSetBuilderT::bcastSortBands(int spin, int n, bool root) +{ + std::vector& SortBands(*FullBands[spin]); + + TinyVector nbands(int(SortBands.size()), n); + mpi::bcast(*this->myComm, nbands); + + // buffer to serialize BandInfo + PooledData misc(nbands[0] * 4); + n = NumDistinctOrbitals = nbands[1]; + + if (root) + { + misc.rewind(); + for (int i = 0; i < n; ++i) + { + misc.put(SortBands[i].TwistIndex); + misc.put(SortBands[i].BandIndex); + misc.put(SortBands[i].Energy); + misc.put(SortBands[i].MakeTwoCopies); + } + + for (int i = n; i < SortBands.size(); ++i) + { + misc.put(SortBands[i].TwistIndex); + misc.put(SortBands[i].BandIndex); + misc.put(SortBands[i].Energy); + misc.put(SortBands[i].MakeTwoCopies); + } + } + this->myComm->bcast(misc); + + if (!root) + { + SortBands.resize(nbands[0]); + misc.rewind(); + for (int i = 0; i < n; ++i) + { + misc.get(SortBands[i].TwistIndex); + misc.get(SortBands[i].BandIndex); + misc.get(SortBands[i].Energy); + misc.get(SortBands[i].MakeTwoCopies); + } + for (int i = n; i < SortBands.size(); ++i) + { + misc.get(SortBands[i].TwistIndex); + misc.get(SortBands[i].BandIndex); + misc.get(SortBands[i].Energy); + misc.get(SortBands[i].MakeTwoCopies); + } + } +} + +inline bool sortByIndex(BandInfo leftB, BandInfo rightB) +{ + if (leftB.BandIndex == rightB.BandIndex) + { + if ((leftB.Energy < rightB.Energy + 1e-6) && (leftB.Energy > rightB.Energy - 1e-6)) + return leftB.TwistIndex < rightB.TwistIndex; + else + return leftB.Energy < rightB.Energy; + } + else + return (leftB.BandIndex < rightB.BandIndex); +}; + +template +bool EinsplineSetBuilderT::ReadOrbitalInfo_ESHDF(bool skipChecks) +{ + app_log() << " Reading orbital file in ESHDF format.\n"; + H5File.read(Version, "/version"); + app_log() << " ESHDF orbital file version " << Version[0] << "." << Version[1] << "." << Version[2] << std::endl; + H5File.read(Lattice, "/supercell/primitive_vectors"); + RecipLattice = 2.0 * M_PI * inverse(Lattice); + SuperLattice = dot(TileMatrix, Lattice); + std::array buff; + int length = std::snprintf(buff.data(), buff.size(), + " Lattice = \n [ %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f ]\n", + Lattice(0, 0), Lattice(0, 1), Lattice(0, 2), Lattice(1, 0), Lattice(1, 1), Lattice(1, 2), + Lattice(2, 0), Lattice(2, 1), Lattice(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting lattice to a string"); + app_log() << std::string_view(buff.data(), length); + length = + std::snprintf(buff.data(), buff.size(), + " SuperLattice = \n [ %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f\n" + " %9.6f %9.6f %9.6f ]\n", + SuperLattice(0, 0), SuperLattice(0, 1), SuperLattice(0, 2), SuperLattice(1, 0), SuperLattice(1, 1), + SuperLattice(1, 2), SuperLattice(2, 0), SuperLattice(2, 1), SuperLattice(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting SuperLattice to a string"); + app_log() << std::string_view(buff.data(), length) << std::endl; + if (!CheckLattice()) + throw std::runtime_error("CheckLattice failed"); + PrimCell.set(Lattice); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI); + int have_dpsi = false; + NumTwists = NumSpins = NumBands = 0; + NumElectrons = TargetPtcl.getTotalNum(); + H5File.read(NumBands, "/electrons/kpoint_0/spin_0/number_of_states"); + H5File.readEntry(NumSpins, "/electrons/number_of_spins"); + H5File.read(NumTwists, "/electrons/number_of_kpoints"); + H5File.readEntry(have_dpsi, "/electrons/have_dpsi"); + HaveOrbDerivs = have_dpsi; + app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists + << std::endl; + ////////////////////////////////// + // Read ion types and locations // + ////////////////////////////////// + Vector species_ids; + H5File.read(species_ids, "/atoms/species_ids"); + int num_species; + H5File.read(num_species, "/atoms/number_of_species"); + std::vector atomic_numbers(num_species); + for (int isp = 0; isp < num_species; isp++) + { + std::ostringstream name; + name << "/atoms/species_" << isp << "/atomic_number"; + H5File.readEntry(atomic_numbers[isp], name.str()); + } + IonTypes.resize(species_ids.size()); + for (int i = 0; i < species_ids.size(); i++) + IonTypes[i] = atomic_numbers[species_ids[i]]; + H5File.read(IonPos, "/atoms/positions"); + for (int i = 0; i < IonTypes.size(); i++) + app_log() << "Atom type(" << i << ") = " << IonTypes[i] << std::endl; + ///////////////////////////////////// + // Read atom orbital info from xml // + ///////////////////////////////////// + // construct Super2Prim mapping. + if (Super2Prim.size() == 0) + { + // SourcePtcl->convert2Cart(SourcePtcl->R); + Super2Prim.resize(SourcePtcl->R.size(), -1); + std::vector prim_atom_counts; + prim_atom_counts.resize(IonPos.size(), 0); + for (int i = 0; i < SourcePtcl->R.size(); i++) + { + PosType ref = PrimCell.toUnit_floor(SourcePtcl->R[i]); + for (int j = 0; j < IonPos.size(); j++) + { + PosType dr = PrimCell.toUnit_floor(IonPos[j]) - ref; + for (int k = 0; k < OHMMS_DIM; k++) + dr[k] -= round(dr[k]); + if (dot(dr, dr) < MatchingTol) + { + if (Super2Prim[i] < 0) + { + Super2Prim[i] = j; + prim_atom_counts[j]++; + } + else + { + app_error() << "Supercell ion " << i << " at " << SourcePtcl->R[j] + << " was found twice in the primitive cell as ion " << Super2Prim[i] << " and " << j + << std::endl; + if (!skipChecks) + abort(); + } + } + } + if (Super2Prim[i] < 0) + { + app_error() << "Supercell ion " << i << " not found in the primitive cell" << std::endl; + if (!skipChecks) + abort(); + } + else + { + // app_log() << "Supercell ion " << i << " mapped to primitive + // cell ion " << Super2Prim[i] << std::endl; + } + } + const int tiling_size = std::abs(det(TileMatrix)); + for (int i = 0; i < IonPos.size(); i++) + if (prim_atom_counts[i] != tiling_size) + { + app_error() << "Primitive cell ion " << i << " was found only " << prim_atom_counts[i] + << " times in the supercell rather than " << tiling_size << std::endl; + if (!skipChecks) + abort(); + } + // construct AtomicCentersInfo + AtomicCentersInfo.resize(IonPos.size()); + for (int i = 0; i < IonPos.size(); i++) + AtomicCentersInfo.ion_pos[i] = IonPos[i]; + const auto& source_species = SourcePtcl->getSpeciesSet(); + int Zind = source_species.findAttribute("atomicnumber"); + const int table_id = SourcePtcl->addTable(*SourcePtcl); + const auto& ii_table = SourcePtcl->getDistTable(table_id); + SourcePtcl->update(true); + for (int i = 0; i < IonPos.size(); i++) + { + AtomicCentersInfo.non_overlapping_radius[i] = std::numeric_limits::max(); + // should only call get_first_neighbor to set non_overlapping_radius + // if there are more than one atom in the cell + if (Super2Prim.size() == 1) + continue; + for (int j = 0; j < Super2Prim.size(); j++) + if (Super2Prim[j] == i) + { + // set GroupID for each ion in primitive cell + if ((Zind < 0) || (source_species(Zind, SourcePtcl->GroupID[j]) == IonTypes[i])) + AtomicCentersInfo.GroupID[i] = SourcePtcl->GroupID[j]; + else + { + app_error() << "Primitive cell ion " << i << " vs supercell ion " << j + << " atomic number not matching: " << IonTypes[i] << " vs " + << source_species(Zind, SourcePtcl->GroupID[j]) << std::endl; + if (!skipChecks) + abort(); + } + // set non_overlapping_radius for each ion in primitive cell + RealType r(0); + PosType dr; + ii_table.get_first_neighbor(j, r, dr, false); + if (r < 1e-3) + APP_ABORT("EinsplineSetBuilder::ReadOrbitalInfo_ESHDF " + "too close ions <1e-3 bohr!"); + AtomicCentersInfo.non_overlapping_radius[i] = 0.5 * r; + break; + } + } + + // load cutoff_radius, spline_radius, spline_npoints, lmax if exists. + const int inner_cutoff_ind = source_species.findAttribute("inner_cutoff"); + const int cutoff_radius_ind = source_species.findAttribute("cutoff_radius"); + const int spline_radius_ind = source_species.findAttribute("spline_radius"); + const int spline_npoints_ind = source_species.findAttribute("spline_npoints"); + const int lmax_ind = source_species.findAttribute("lmax"); + + for (int center_idx = 0; center_idx < AtomicCentersInfo.Ncenters; center_idx++) + { + const int my_GroupID = AtomicCentersInfo.GroupID[center_idx]; + if (inner_cutoff_ind >= 0) + AtomicCentersInfo.inner_cutoff[center_idx] = source_species(inner_cutoff_ind, my_GroupID); + if (cutoff_radius_ind >= 0) + AtomicCentersInfo.cutoff[center_idx] = source_species(cutoff_radius_ind, my_GroupID); + if (spline_radius_ind >= 0) + AtomicCentersInfo.spline_radius[center_idx] = source_species(spline_radius_ind, my_GroupID); + if (spline_npoints_ind >= 0) + AtomicCentersInfo.spline_npoints[center_idx] = source_species(spline_npoints_ind, my_GroupID); + if (lmax_ind >= 0) + AtomicCentersInfo.lmax[center_idx] = source_species(lmax_ind, my_GroupID); + } + } + /////////////////////////// + // Read the twist angles // + /////////////////////////// + primcell_kpoints.resize(NumTwists); + for (int ti = 0; ti < NumTwists; ti++) + { + std::ostringstream path; + path << "/electrons/kpoint_" << ti << "/reduced_k"; + TinyVector primcell_kpoints_DP; + H5File.read(primcell_kpoints_DP, path.str()); + primcell_kpoints[ti] = primcell_kpoints_DP; + } + if (qmc_common.use_density) + { + ////////////////////////////////////////////////////////// + // Only if it is bulk: If the density has not been set in TargetPtcl, + // and // the density is available, read it in and save it // in + // TargetPtcl. // + ////////////////////////////////////////////////////////// + if (TargetPtcl.getLattice().SuperCellEnum == SUPERCELL_BULK) + { + // FIXME: add support for more than one spin density + if (TargetPtcl.Density_G.empty()) + { + Array Density_r_DP; + TinyVector mesh; + H5File.read(TargetPtcl.DensityReducedGvecs, "/electrons/density/gvectors"); + int numG = TargetPtcl.DensityReducedGvecs.size(); +// Convert primitive G-vectors to supercell G-vectors +// Also, flip sign since ESHDF format uses opposite sign convention +#pragma omp parallel for + for (int iG = 0; iG < numG; iG++) + TargetPtcl.DensityReducedGvecs[iG] = -1 * dot(TileMatrix, TargetPtcl.DensityReducedGvecs[iG]); + app_log() << " Read " << numG << " density G-vectors.\n"; + for (int ispin = 0; ispin < NumSpins; ispin++) + { + std::ostringstream density_r_path, density_g_path; + density_r_path << "/electrons/density/spin_" << ispin << "/density_r"; + density_g_path << "/electrons/density/spin_" << ispin << "/density_g"; + H5File.readEntry(Density_r_DP, density_r_path.str()); + TargetPtcl.Density_r = Density_r_DP; + if (TargetPtcl.DensityReducedGvecs.size()) + { + app_log() << " EinsplineSetBuilder found density in " + "the HDF5 file.\n"; + std::vector density_G; + std::vector> Density_G_DP; + H5File.read(Density_G_DP, density_g_path.str()); + density_G.assign(Density_G_DP.begin(), Density_G_DP.end()); + if (!density_G.size()) + { + app_error() << " Density reduced G-vectors " + "defined, but not the" + << " density.\n"; + abort(); + } + else + { + if (ispin == 0) + TargetPtcl.Density_G = density_G; + else + for (int iG = 0; iG < density_G.size(); iG++) + TargetPtcl.Density_G[iG] += density_G[iG]; + } + } + } + } + ////////////////////////////////////////////////////////// + // If the density has not been set in TargetPtcl, and // + // the density is available, read it in and save it // + // in TargetPtcl. // + ////////////////////////////////////////////////////////// + // FIXME: add support for more than one spin potential + if (!TargetPtcl.VHXC_r[0].size()) + { + TinyVector mesh; + H5File.readEntry(TargetPtcl.VHXCReducedGvecs, "/electrons/VHXC/gvectors"); + int numG = TargetPtcl.VHXCReducedGvecs.size(); +// Convert primitive G-vectors to supercell G-vectors +// Also, flip sign since ESHDF format uses opposite sign convention +#pragma omp parallel for + for (int iG = 0; iG < numG; iG++) + TargetPtcl.VHXCReducedGvecs[iG] = -1 * dot(TileMatrix, TargetPtcl.VHXCReducedGvecs[iG]); + app_log() << " Read " << numG << " VHXC G-vectors.\n"; + for (int ispin = 0; ispin < NumSpins; ispin++) + { + Array VHXC_r_DP; + std::ostringstream VHXC_r_path, VHXC_g_path; + VHXC_r_path << "/electrons/VHXC/spin_" << ispin << "/VHXC_r"; + VHXC_g_path << "/electrons/VHXC/spin_" << ispin << "/VHXC_g"; + H5File.readEntry(VHXC_r_DP, VHXC_r_path.str()); + TargetPtcl.VHXC_r[ispin] = VHXC_r_DP; + if (TargetPtcl.VHXCReducedGvecs.size()) + { + app_log() << " EinsplineSetBuilder found VHXC in the " + "HDF5 file.\n"; + std::vector> VHXC_G_DP; + std::vector VHXC_G; + H5File.read(VHXC_G_DP, VHXC_g_path.str()); + VHXC_G.assign(VHXC_G_DP.begin(), VHXC_G_DP.end()); + if (!VHXC_G.size()) + { + app_error() << " VHXC reduced G-vectors defined, " + "but not the" + << " VHXC.\n"; + abort(); + } + else + TargetPtcl.VHXC_G[ispin] = VHXC_G; + } + } + } + } + } + else + { + app_log() << " Skip initialization of the density" << std::endl; + } + return true; +} + +template +void EinsplineSetBuilderT::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs) +{ + if (this->myComm->rank() != 0) + return; + + std::vector& SortBands(*FullBands[spin]); + SortBands.clear(); //??? can exit if SortBands is already made? + int maxOrbs(0); + for (int ti = 0; ti < DistinctTwists.size(); ti++) + { + int tindex = DistinctTwists[ti]; + // First, read valence states + std::ostringstream ePath; + ePath << "/electrons/kpoint_" << tindex << "/spin_" << spin << "/eigenvalues"; + std::vector eigvals; + H5File.read(eigvals, ePath.str()); + for (int bi = 0; bi < NumBands; bi++) + { + BandInfo band; + band.TwistIndex = tindex; + band.BandIndex = bi; + band.MakeTwoCopies = MakeTwoCopies[ti]; + band.Energy = eigvals[bi]; + if (band.Energy > -1.0e100) + SortBands.push_back(band); + if (MakeTwoCopies[ti]) + maxOrbs += 2; + else + maxOrbs++; + } + } + + app_log() << SortBands.size() << " complex-valued orbitals supplied by h5 can be expanded up to " << maxOrbs + << " SPOs." << std::endl; + if (maxOrbs < numOrbs) + this->myComm->barrier_and_abort("EinsplineSetBuilder::OccupyBands_ESHDF user input requests " + "more orbitals than what the h5 file supplies."); + + // Now sort the bands by energy + if (sortBands == 2) + { + app_log() << "Sorting the bands by index now:\n"; + sort(SortBands.begin(), SortBands.end(), sortByIndex); + } + else if (sortBands == 1) + { + app_log() << "Sorting the bands now:\n"; + sort(SortBands.begin(), SortBands.end()); + } + + std::vector gsOcc(maxOrbs); + int N_gs_orbs = numOrbs; + int nocced(0); + for (int ti = 0; ti < SortBands.size(); ti++) + { + if (nocced < N_gs_orbs) + { + if (SortBands[ti].MakeTwoCopies && (N_gs_orbs - nocced > 1)) + { + nocced += 2; + gsOcc[ti] = 2; + } + else if ((SortBands[ti].MakeTwoCopies && (N_gs_orbs - nocced == 1)) || !SortBands[ti].MakeTwoCopies) + { + nocced += 1; + gsOcc[ti] = 1; + } + } + } + if (occ_format == "energy") + { + app_log() << " Occupying bands based on energy in mode " << (Occ.size() > 0 ? "\"excited\"" : "\"ground\"") + << std::endl; + // To get the occupations right. + std::vector Removed(0, 0); + std::vector Added(0, 0); + for (int ien = 0; ien < Occ.size(); ien++) + { + if (Occ[ien] < 0) + Removed.push_back(-Occ[ien]); + else if (Occ[ien] > 0) + Added.push_back(Occ[ien]); + } + if (Added.size() - Removed.size() != 0) + { + app_log() << "need to add and remove same number of orbitals. " << Added.size() << " " << Removed.size() + << std::endl; + APP_ABORT("ChangedOccupations"); + } + std::vector DiffOcc(maxOrbs, 0); + // Probably a cleaner way to do this. + for (int i = 0; i < Removed.size(); i++) + DiffOcc[Removed[i] - 1] -= 1; + for (int i = 0; i < Added.size(); i++) + DiffOcc[Added[i] - 1] += 1; + std::vector SumOrb(SortBands.size(), 0); + int doi(0); + for (int i = 0; i < SumOrb.size(); i++) + { + if (SortBands[i].MakeTwoCopies) + { + SumOrb[i] = gsOcc[i] + DiffOcc[doi++]; + SumOrb[i] += DiffOcc[doi++]; + } + else + SumOrb[i] = gsOcc[i] + DiffOcc[doi++]; + } + std::vector ReOrderedBands; + std::vector RejectedBands; + for (int i = 0; i < SumOrb.size(); i++) + { + if (SumOrb[i] == 2) + { + SortBands[i].MakeTwoCopies = true; + ReOrderedBands.push_back(SortBands[i]); + } + else if (SumOrb[i] == 1) + { + SortBands[i].MakeTwoCopies = false; + ReOrderedBands.push_back(SortBands[i]); + } + else if (SumOrb[i] == 0) + { + SortBands[i].MakeTwoCopies = false; + RejectedBands.push_back(SortBands[i]); + } + else + { + app_log() << " Trying to add the same orbital (" << i << ") less than zero or more than 2 times." << std::endl; + APP_ABORT("Sorting Excitation"); + } + } + ReOrderedBands.insert(ReOrderedBands.end(), RejectedBands.begin(), RejectedBands.end()); + SortBands = ReOrderedBands; + } + else if (occ_format == "band") + { + app_log() << " Occupying bands based on (ti,bi) data." << std::endl; + if (Occ.size() != particle_hole_pairs * 4) + { + app_log() << " Need Occ = pairs*4. Occ is (ti,bi) of removed, then added." << std::endl; + app_log() << Occ.size() << " " << particle_hole_pairs << std::endl; + APP_ABORT("ChangedOccupations"); + } + int cnt(0); + for (int ien = 0; ien < SortBands.size(); ien++) + { + if ((Occ[cnt] == SortBands[ien].TwistIndex) && (Occ[cnt + 1] == SortBands[ien].BandIndex)) + { + if (cnt < particle_hole_pairs * 2) + { + gsOcc[ien] -= 1; + cnt += 2; + app_log() << "removing orbital " << ien << std::endl; + } + else + { + gsOcc[ien] += 1; + app_log() << "adding orbital " << ien << std::endl; + cnt += 2; + } + } + } + std::vector ReOrderedBands; + std::vector RejectedBands; + for (int i = 0; i < SortBands.size(); i++) + { + if (gsOcc[i] == 2) + { + SortBands[i].MakeTwoCopies = true; + ReOrderedBands.push_back(SortBands[i]); + } + else if (gsOcc[i] == 1) + { + SortBands[i].MakeTwoCopies = false; + ReOrderedBands.push_back(SortBands[i]); + } + else if (gsOcc[i] == 0) + { + SortBands[i].MakeTwoCopies = false; + RejectedBands.push_back(SortBands[i]); + } + else + { + app_log() << " Trying to add the same orbital (" << i << ") less than zero or more than 2 times." << std::endl; + APP_ABORT("Sorting Excitation"); + } + } + ReOrderedBands.insert(ReOrderedBands.end(), RejectedBands.begin(), RejectedBands.end()); + SortBands = ReOrderedBands; + } + // for(int sw=0;sw +void EinsplineSetBuilderT::set_metadata(int numOrbs, + int twist_num_inp, + const TinyVector& twist_inp, + bool skipChecks) +{ + // 1. set a lot of internal parameters in the EinsplineSetBuilder class + // e.g. TileMatrix, use_real_splines_, DistinctTwists, MakeTwoCopies. + // 2. this is also where metadata for the orbitals are read from the + // wavefunction hdf5 file + // and broadcast to MPI groups. Variables broadcasted are listed in + // EinsplineSetBuilderCommon.cpp + // EinsplineSetBuilder::BroadcastOrbitalInfo() + // + + Timer orb_info_timer; + // The tiling can be set by a simple vector, (e.g. 2x2x2), or by a + // full 3x3 matrix of integers. If the tilematrix was not set in + // the input file... + bool matrixNotSet = true; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + matrixNotSet = matrixNotSet && (TileMatrix(i, j) == 0); + // then set the matrix to identity. + if (matrixNotSet) + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + TileMatrix(i, j) = (i == j) ? 1 : 0; + if (this->myComm->rank() == 0) + { + std::array buff; + int length = std::snprintf(buff.data(), buff.size(), + " TileMatrix = \n [ %2d %2d %2d\n %2d %2d %2d\n %2d %2d %2d " + "]\n", + TileMatrix(0, 0), TileMatrix(0, 1), TileMatrix(0, 2), TileMatrix(1, 0), TileMatrix(1, 1), + TileMatrix(1, 2), TileMatrix(2, 0), TileMatrix(2, 1), TileMatrix(2, 2)); + if (length < 0) + throw std::runtime_error("Error converting TileMatrix to a string"); + app_log() << std::string_view(buff.data(), length); + } + if (numOrbs == 0) + this->myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet You must specify the number of " + "orbitals in the input file."); + else + app_log() << " Reading " << numOrbs << " orbitals from HDF5 file.\n"; + + ///////////////////////////////////////////////////////////////// + // Read the basic orbital information, without reading all the // + // orbitals themselves. // + ///////////////////////////////////////////////////////////////// + orb_info_timer.restart(); + if (this->myComm->rank() == 0) + if (!ReadOrbitalInfo(skipChecks)) + throw std::runtime_error("EinsplineSetBuilder::set_metadata Error " + "reading orbital info from HDF5 file."); + app_log() << "TIMER EinsplineSetBuilder::ReadOrbitalInfo " << orb_info_timer.elapsed() << std::endl; + this->myComm->barrier(); + + orb_info_timer.restart(); + BroadcastOrbitalInfo(); + app_log() << "TIMER EinsplineSetBuilder::BroadcastOrbitalInfo " << orb_info_timer.elapsed() << std::endl; + app_log().flush(); + + // setup primitive cell and supercell + PrimCell.set(Lattice); + SuperCell.set(SuperLattice); + GGt = dot(transpose(PrimCell.G), PrimCell.G); + + // Now, analyze the k-point mesh to figure out the what k-points are needed + AnalyzeTwists2(twist_num_inp, twist_inp); +} + +template +std::unique_ptr> EinsplineSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) +{ + // use 2 bohr as the default when truncated orbitals are used based on the + // extend of the ions + int numOrbs = 0; + int sortBands(1); + int spinSet = 0; + bool skipChecks = false; + int twist_num_inp = TWISTNUM_NO_INPUT; + TinyVector twist_inp(TWIST_NO_INPUT); + + std::string sourceName; + std::string spo_prec("double"); + std::string truncate("no"); + std::string hybrid_rep("no"); + std::string skip_checks("no"); + std::string use_einspline_set_extended("no"); // use old spline library for high-order derivatives, e.g. needed + // for backflow optimization + std::string useGPU; + std::string GPUsharing = "no"; + std::string spo_object_name; + + ScopedTimer spo_timer_scope(createGlobalTimer("einspline::CreateSPOSetFromXML", timer_level_medium)); + + { + TinyVector TileFactor_do_not_use; + OhmmsAttributeSet a; + a.add(H5FileName, "href"); + a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED); + a.add(sortBands, "sort"); + a.add(TileMatrix, "tilematrix"); + a.add(twist_num_inp, "twistnum"); + a.add(twist_inp, "twist"); + a.add(sourceName, "source"); + a.add(MeshFactor, "meshfactor"); + a.add(hybrid_rep, "hybridrep"); + a.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values); + a.add(GPUsharing, + "gpusharing"); // split spline across GPUs visible per rank + a.add(spo_prec, "precision"); + a.add(truncate, "truncate"); + a.add(this->myName, "tag"); + a.add(skip_checks, "skip_checks"); + + a.put(XMLRoot); + a.add(numOrbs, "size"); + a.add(numOrbs, "norbs"); + a.add(spinSet, "spindataset"); + a.add(spinSet, "group"); + a.put(cur); + + if (this->myName.empty()) + this->myName = "einspline"; + } + + if (skip_checks == "yes") + skipChecks = true; + + auto pit(ParticleSets.find(sourceName)); + if (pit == ParticleSets.end()) + this->myComm->barrier_and_abort("Einspline needs the source particleset"); + else + SourcePtcl = pit->second.get(); + + /////////////////////////////////////////////// + // Read occupation information from XML file // + /////////////////////////////////////////////// + const std::vector last_occ(Occ); + Occ.resize(0, 0); // correspond to ground + bool NewOcc(false); + + { + OhmmsAttributeSet oAttrib; + oAttrib.add(spinSet, "spindataset"); + oAttrib.add(spo_object_name, "name"); + oAttrib.add(spo_object_name, "id"); + oAttrib.put(cur); + } + + xmlNodePtr spo_cur = cur; + cur = cur->children; + while (cur != NULL) + { + std::string cname((const char*)(cur->name)); + if (cname == "occupation") + { + std::string occ_mode("ground"); + occ_format = "energy"; + particle_hole_pairs = 0; + OhmmsAttributeSet oAttrib; + oAttrib.add(occ_mode, "mode"); + oAttrib.add(spinSet, "spindataset"); + oAttrib.add(occ_format, "format"); + oAttrib.add(particle_hole_pairs, "pairs"); + oAttrib.put(cur); + if (occ_mode == "excited") + putContent(Occ, cur); + else if (occ_mode != "ground") + this->myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet Only ground state " + "occupation " + "currently supported in EinsplineSetBuilder."); + } + cur = cur->next; + } + if (Occ != last_occ) + { + NewOcc = true; + } + else + NewOcc = false; +#if defined(MIXED_PRECISION) + app_log() << "\t MIXED_PRECISION=1 Overwriting the einspline storage to " + "single precision.\n"; + spo_prec = "single"; // overwrite +#endif + H5OrbSet aset(H5FileName, spinSet, numOrbs); + const auto iter = SPOSetMap.find(aset); + if ((iter != SPOSetMap.end()) && (!NewOcc)) + app_warning() << "!!!!!!! Identical SPOSets are detected by EinsplineSetBuilder! " + "Implicit sharing one SPOSet for spin-up and spin-down " + "electrons has been removed. " + "Each determinant creates its own SPOSet with dedicated memory " + "for spline coefficients. " + "To avoid increasing the memory footprint of spline " + "coefficients, " + "create a single SPOset outside the determinantset using " + "'sposet_collection' " + "and reference it by name on the determinant line." + << std::endl; + + if (FullBands[spinSet] == 0) + FullBands[spinSet] = std::make_unique>(); + + // Ensure the first SPO set must be spinSet==0 + // to correctly initialize key data of EinsplineSetBuilder + if (SPOSetMap.size() == 0 && spinSet != 0) + this->myComm->barrier_and_abort("The first SPO set must have spindataset=\"0\""); + + // set the internal parameters + if (spinSet == 0) + set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); + + ////////////////////////////////// + // Create the OrbitalSet object + ////////////////////////////////// + Timer mytimer; + mytimer.restart(); + OccupyBands(spinSet, sortBands, numOrbs, skipChecks); + if (spinSet == 0) + TileIons(); + + bool use_single = (spo_prec == "single" || spo_prec == "float"); + + // safeguard for a removed feature + if (truncate == "yes") + this->myComm->barrier_and_abort("The 'truncate' feature of spline SPO has been removed. Please use " + "hybrid orbital representation."); + + createBsplineReader(use_single, hybrid_rep == "yes", useGPU); + + MixedSplineReader->setCommon(XMLRoot); + // temporary disable the following function call, Ye Luo + // RotateBands_ESHDF(spinSet, + // dynamic_cast >*>(OrbitalSet)); + bcastSortBands(spinSet, NumDistinctOrbitals, this->myComm->rank() == 0); + auto OrbitalSet = MixedSplineReader->create_spline_set(spinSet, spo_cur); + if (!OrbitalSet) + this->myComm->barrier_and_abort("Failed to create SPOSet*"); + app_log() << "Time spent in creating B-spline SPOs " << mytimer.elapsed() << "sec" << std::endl; + OrbitalSet->finalizeConstruction(); + SPOSetMap[aset] = OrbitalSet.get(); + return OrbitalSet; +} + +template +void EinsplineSetBuilderT::createBsplineReader(bool useSingle, bool hybridRep, const std::string& useGPU) +{ + if (use_real_splines_) + { + // if(TargetPtcl.Lattice.SuperCellEnum != SUPERCELL_BULK && + // truncate=="yes") + if (MixedSplineReader == 0) + { + if (useSingle) + MixedSplineReader = createBsplineRealSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = createBsplineRealDoubleT(this, hybridRep, useGPU); + } + } + else + { + if (MixedSplineReader == 0) + { + if (useSingle) + MixedSplineReader = createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = createBsplineComplexDoubleT(this, hybridRep, useGPU); + } + } +} + +#ifdef QMC_COMPLEX +template<> +void EinsplineSetBuilderT>::createBsplineReader(bool useSingle, + bool hybridRep, + const std::string& useGPU) +{ + if (MixedSplineReader == 0) + { + if (useSingle) + MixedSplineReader = createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = createBsplineComplexDoubleT(this, hybridRep, useGPU); + } +} + +template<> +void EinsplineSetBuilderT>::createBsplineReader(bool useSingle, + bool hybridRep, + const std::string& useGPU) +{ + if (MixedSplineReader == 0) + { + if (useSingle) + MixedSplineReader = createBsplineComplexSingleT(this, hybridRep, useGPU); + else + MixedSplineReader = createBsplineComplexDoubleT(this, hybridRep, useGPU); + } +} +#endif + +template +std::unique_ptr> EinsplineSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) +{ + if (MixedSplineReader == 0) + this->myComm->barrier_and_abort("EinsplineSetExtended cannot create a SPOSet"); + + std::string aname; + int spinSet(0); + OhmmsAttributeSet a; + a.add(spinSet, "spindataset"); + a.add(spinSet, "group"); + a.put(cur); + + // allow only non-overlapping index sets and use the max index as the + // identifier + int norb = input_info.max_index(); + H5OrbSet aset(H5FileName, spinSet, norb); + + auto bspline_zd = MixedSplineReader->create_spline_set(spinSet, cur, input_info); + if (bspline_zd) + SPOSetMap[aset] = bspline_zd.get(); + return bspline_zd; +} + +template +bool EinsplineSetBuilderT::ReadOrbitalInfo(bool skipChecks) +{ + if (!H5File.open(H5FileName, H5F_ACC_RDONLY)) + { + app_error() << "Could not open HDF5 file \"" << H5FileName << "\" in EinsplineSetBuilder::ReadOrbitalInfo.\n"; + return false; + } + + // Read format + std::string format; + H5File.read(format, "/format"); + H5File.read(Version, "/version"); + app_log() << " HDF5 orbital file version " << Version[0] << "." << Version[1] << "." << Version[2] << "\n"; + if (format.find("ES") < format.size()) + { + Format = ESHDF; + return ReadOrbitalInfo_ESHDF(skipChecks); + } + + app_error() << "EinsplineSetBuilder::ReadOrbitalInfo too old h5 file which " + "is not in ESHDF format! Regenerate the h5 file"; + return false; +} + +template +bool EinsplineSetBuilderT::ReadGvectors_ESHDF() +{ + bool root = this->myComm->rank() == 0; + // this is always ugly + MeshSize = 0; + int hasPsig = 1; + if (root) + { + H5File.readEntry(MeshSize, "/electrons/psi_r_mesh"); + H5File.readEntry(MeshSize, "/electrons/mesh"); + } + this->myComm->bcast(MeshSize); + hasPsig = (MeshSize[0] == 0); + if (hasPsig) + { + int nallowed = 257; + int allowed[] = {72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, + 160, 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, + 320, 324, 360, 375, 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, + 576, 600, 625, 640, 648, 675, 720, 729, 750, 768, 800, 810, 864, 900, + 960, 972, 1000, 1024, 1080, 1125, 1152, 1200, 1215, 1250, 1280, 1296, 1350, 1440, + 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875, 1920, 1944, 2000, 2025, 2048, 2160, + 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592, 2700, 2880, 2916, 3000, 3072, 3125, + 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840, 3888, 4000, 4050, 4096, 4320, 4374, + 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400, 5625, 5760, 5832, 6000, 6075, 6144, + 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290, 7500, 7680, 7776, 8000, 8100, 8192, + 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000, 10125, 10240, 10368, 10800, 10935, 11250, + 11520, 11664, 12000, 12150, 12288, 12500, 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, + 15360, 15552, 15625, 16000, 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, + 19440, 19683, 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576, + 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720, 31104, 31250, + 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864, 37500, 38400, 38880, 39366, + 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080, 46656, 46875, 48000, 48600, 49152, 50000, + 50625, 51200, 51840, 52488, 54000, 54675, 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, + 62208, 62500, 64000, 64800, 65536}; + MaxNumGvecs = 0; + // std::set > Gset; + // Read k-points for all G-vectors and take the union + TinyVector maxIndex(0, 0, 0); + Gvecs.resize(NumTwists); + { + int numg = 0; + if (root) + { + std::ostringstream Gpath; + Gpath << "/electrons/kpoint_0/gvectors"; + H5File.read(Gvecs[0], Gpath.str()); + numg = Gvecs[0].size(); + } + this->myComm->bcast(numg); + if (!root) + Gvecs[0].resize(numg); + this->myComm->bcast(Gvecs[0]); + MaxNumGvecs = Gvecs[0].size(); + for (int ig = 0; ig < Gvecs[0].size(); ig++) + { + maxIndex[0] = std::max(maxIndex[0], std::abs(Gvecs[0][ig][0])); + maxIndex[1] = std::max(maxIndex[1], std::abs(Gvecs[0][ig][1])); + maxIndex[2] = std::max(maxIndex[2], std::abs(Gvecs[0][ig][2])); + } + // for (int ig=0; ig=2 up to 65536 + int* ix = std::lower_bound(allowed, allowed + nallowed, MeshSize[0]); + int* iy = std::lower_bound(allowed, allowed + nallowed, MeshSize[1]); + int* iz = std::lower_bound(allowed, allowed + nallowed, MeshSize[2]); + MeshSize[0] = (MeshSize[0] > 128) ? *ix : (MeshSize[0] + MeshSize[0] % 2); + MeshSize[1] = (MeshSize[1] > 128) ? *iy : (MeshSize[1] + MeshSize[1] % 2); + MeshSize[2] = (MeshSize[2] > 128) ? *iz : (MeshSize[2] + MeshSize[2] % 2); + if (Version[0] < 2) + { + // get the map for each twist, but use the MeshSize from kpoint_0 + app_log() << " ESHDF::Version " << Version << std::endl; + app_log() << " Assumes distinct Gvecs set for different twists. " + "Regenerate orbital files using updated QE." + << std::endl; + for (int k = 0; k < DistinctTwists.size(); ++k) + { + int ik = DistinctTwists[k]; + if (ik == 0) + continue; // already done + int numg = 0; + if (root) + { + std::ostringstream Gpath; + Gpath << "/electrons/kpoint_" << ik << "/gvectors"; + H5File.read(Gvecs[ik], Gpath.str()); + numg = Gvecs[ik].size(); + } + this->myComm->bcast(numg); + if (numg == 0) + { + // copy kpoint_0, default + Gvecs[ik] = Gvecs[0]; + } + else + { + if (numg != MaxNumGvecs) + { + std::ostringstream o; + o << "Twist " << ik << ": The number of Gvecs is different from kpoint_0." + << " This is not supported anymore. Rerun " + "pw2qmcpack.x or equivalent"; + APP_ABORT(o.str()); + } + if (!root) + Gvecs[ik].resize(numg); + this->myComm->bcast(Gvecs[ik]); + } + } + } + } + app_log() << "B-spline mesh factor is " << MeshFactor << std::endl; + app_log() << "B-spline mesh size is (" << MeshSize[0] << ", " << MeshSize[1] << ", " << MeshSize[2] << ")\n"; + app_log() << "Maxmimum number of Gvecs " << MaxNumGvecs << std::endl; + app_log().flush(); + return hasPsig; +} + +//#ifndef QMC_COMPLEX +template class EinsplineSetBuilderT; +template class EinsplineSetBuilderT; +#ifdef QMC_COMPLEX +template class EinsplineSetBuilderT>; +template class EinsplineSetBuilderT>; +#endif +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSetBuilderT.h new file mode 100644 index 0000000000..822354a164 --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.h @@ -0,0 +1,301 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file EinsplineSetBuilder.h + * + * Builder class for einspline-based SPOSet objects. + */ +#ifndef QMCPLUSPLUS_EINSPLINE_SET_BUILDERT_H +#define QMCPLUSPLUS_EINSPLINE_SET_BUILDERT_H + +#include "QMCWaveFunctions/BandInfo.h" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" + +#include +#include + +#define PW_COEFF_NORM_TOLERANCE 1e-6 + +class Communicate; + +namespace qmcplusplus +{ +/// forward declaration of BsplineReaderBase +template +class BsplineReaderBaseT; + +// Helper needed for TwistMap +struct Int3less +{ + bool operator()(const TinyVector& a, const TinyVector& b) const + { + if (a[0] > b[0]) + return false; + if (a[0] < b[0]) + return true; + if (a[1] > b[1]) + return false; + if (a[1] < b[1]) + return true; + if (a[2] > b[2]) + return false; + if (a[2] < b[2]) + return true; + return false; + } +}; +struct Int4less +{ + bool operator()(const TinyVector& a, const TinyVector& b) const + { + for (int i = 0; i < 4; i++) + { + if (a[i] > b[i]) + return false; + if (a[i] < b[i]) + return true; + } + return false; + } +}; + +/** construct a name for spline SPO set + */ +struct H5OrbSet +{ + /// index for the spin set + int SpinSet; + /// number of orbitals that belong to this set + int NumOrbs; + /// name of the HDF5 file + std::filesystem::path FileName; + /** true if a < b + * + * The ordering + * - name + * - spin set + * - number of orbitals + */ + bool operator()(const H5OrbSet& a, const H5OrbSet& b) const + { + if (a.FileName == b.FileName) + { + if (a.SpinSet == b.SpinSet) + return a.NumOrbs < b.NumOrbs; + else + return a.SpinSet < b.SpinSet; + } + else + return a.FileName < b.FileName; + } + + H5OrbSet(std::filesystem::path name, int spinSet, int numOrbs) + : SpinSet(spinSet), NumOrbs(numOrbs), FileName(std::move(name)) + {} + H5OrbSet() = default; +}; + +/** EinsplineSet builder + */ +template +class EinsplineSetBuilderT : public SPOSetBuilderT +{ +public: + static constexpr auto DIM = ParticleSetT::DIM; + + using PSetMap = std::map>>; + using UnitCellType = CrystalLattice::Scalar_t, DIM>; + using RealType = typename SPOSetBuilderT::RealType; + using PosType = typename SPOSetBuilderT::PosType; + using ComplexType = typename SPOSetT::ComplexType; + + /// reference to the particleset pool + const PSetMap& ParticleSets; + /// quantum particle set + ParticleSetT& TargetPtcl; + /// ionic system + ParticleSetT* SourcePtcl; + + /** Helper vector for sorting bands + */ + std::vector>> FullBands; + + /// reader to use BsplineReaderBase + std::unique_ptr> MixedSplineReader; + + /// This is true if we have the orbital derivatives w.r.t. the ion positions + bool HaveOrbDerivs; + /// root XML node with href, sort, tilematrix, twistnum, source, + /// precision,truncate,version + xmlNodePtr XMLRoot; + + std::map*, H5OrbSet> SPOSetMap; + + /// constructor + EinsplineSetBuilderT(ParticleSetT& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur); + + /// destructor + ~EinsplineSetBuilderT() override; + + /** initialize the Antisymmetric wave function for electrons + * @param cur the current xml node + */ + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + + /** initialize with the existing SPOSet */ + std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) override; + + ////////////////////////////////////// + // HDF5-related data and functions // + ////////////////////////////////////// + hdf_archive H5File; + std::filesystem::path H5FileName; + // HDF5 orbital file version + typedef enum + { + QMCPACK, + ESHDF + } FormatType; + FormatType Format; + TinyVector Version; + std::string parameterGroup, ionsGroup, eigenstatesGroup; + std::vector Occ; + bool ReadOrbitalInfo(bool skipChecks = false); + bool ReadOrbitalInfo_ESHDF(bool skipChecks = false); + void BroadcastOrbitalInfo(); + bool CheckLattice(); + + /** read gvectors for each twist + * @return true, if psi_g is found + */ + bool ReadGvectors_ESHDF(); + + Tensor Lattice, RecipLattice, LatticeInv, SuperLattice, GGt; + UnitCellType SuperCell, PrimCell, PrimCellInv; + int NumBands, NumElectrons, NumSpins, NumTwists; + int MaxNumGvecs; + double MeshFactor; + RealType MatchingTol; + TinyVector MeshSize; + std::vector>> Gvecs; + + Vector IonTypes; + Vector> IonPos; + // mapping the ions in the supercell to the primitive cell + std::vector Super2Prim; + + ///////////////////////////// + // Twist angle information // + ///////////////////////////// + // The "true" twist number after analyzing twistnum, twist XML input and h5 + int twist_num_; + // primitive cell k-points from DFT calculations + std::vector> primcell_kpoints; + // primitive cell to supercell tiling matrix + Tensor TileMatrix; + // This vector stores which twist indices will be used by this clone + std::vector> UseTwists; + std::vector IncludeTwists, DistinctTwists; + /// if false, splines are conceptually complex valued + bool use_real_splines_; + int NumDistinctOrbitals; + // This is true if the corresponding twist in DistinctTwists should + // should be used to generate two distinct orbitals from the real and + // imaginary parts. + std::vector MakeTwoCopies; + // This maps a 3-integer twist index into the twist number in the file + std::map, int, Int3less> TwistMap; + + bool TwistPair(PosType a, PosType b) const; + void TileIons(); + void OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks = false); + void OccupyBands_ESHDF(int spin, int sortBands, int numOrbs); + + //////////////////////////////// + // Atomic orbital information // + //////////////////////////////// + struct CenterInfo + { + std::vector lmax, spline_npoints, GroupID; + std::vector spline_radius, cutoff, inner_cutoff, non_overlapping_radius; + std::vector> ion_pos; + int Ncenters; + + CenterInfo() : Ncenters(0){}; + + void resize(int ncenters) + { + Ncenters = ncenters; + lmax.resize(ncenters, -1); + spline_npoints.resize(ncenters, -1); + GroupID.resize(ncenters, 0); + spline_radius.resize(ncenters, -1.0); + inner_cutoff.resize(ncenters, -1.0); + non_overlapping_radius.resize(ncenters, -1.0); + cutoff.resize(ncenters, -1.0); + ion_pos.resize(ncenters); + } + } AtomicCentersInfo; + + // This returns the path in the HDF5 file to the group for orbital + // with twist ti and band bi + std::string OrbitalPath(int ti, int bi); + + ///////////////////////////////////////////////////////////// + // Information to avoid storing the same orbitals twice in // + // spin-restricted calculations. // + ///////////////////////////////////////////////////////////// + int LastSpinSet, NumOrbitalsRead; + + std::string occ_format; + int particle_hole_pairs; + bool makeRotations; + +protected: + /** broadcast SortBands + * @param N number of state + * @param root true if it is the i/o node + */ + void bcastSortBands(int splin, int N, bool root); + + /** a specific but clean code path in createSPOSetFromXML, for PBC, double, + * ESHDF + * @param cur the current xml node + */ + void set_metadata(int numOrbs, + int twist_num_inp, + const TinyVector& twist_inp, + bool skipChecks = false); + + void createBsplineReader(bool useSingle, bool hybridRep, const std::string& useGPU); + + /** analyze twists of orbitals in h5 and determinine twist_num_ + * @param twist_num_inp twistnum XML input + * @param twist_inp twst XML input + */ + void AnalyzeTwists2(const int twist_num_inp, const TinyVector& twist_inp); + + /// twistnum_inp == -9999 to indicate no given input after parsing XML + static constexpr int TWISTNUM_NO_INPUT = -9999; + /// twist_inp[i] <= -9999 to indicate no given input after parsing XML + static constexpr double TWIST_NO_INPUT = -9999; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp similarity index 62% rename from src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp rename to src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp index 65037960be..94cc8ceec0 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp +++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.cpp @@ -17,32 +17,34 @@ ////////////////////////////////////////////////////////////////////////////////////// -#include "EinsplineSpinorSetBuilder.h" -#include "QMCWaveFunctions/SpinorSet.h" +#include "EinsplineSpinorSetBuilderT.h" +#include "QMCWaveFunctions/SpinorSetT.h" #include "OhmmsData/AttributeSet.h" #include "Message/CommOperators.h" #include "Utilities/Timer.h" -#include "einspline_helper.hpp" -#include "BsplineReaderBase.h" -#include "createBsplineReader.h" +#include "QMCWaveFunctions/BsplineFactory/einspline_helper.hpp" +#include "QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h" +#include "QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h" +#include "QMCWaveFunctions/SpinorSet.h" namespace qmcplusplus { -std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> EinsplineSpinorSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { int numOrbs = 0; int sortBands(1); int spinSet = 0; int spinSet2 = 1; - int twist_num_inp = TWISTNUM_NO_INPUT; - TinyVector twist_inp(TWIST_NO_INPUT); + int twist_num_inp = this->TWISTNUM_NO_INPUT; + TinyVector twist_inp(this->TWIST_NO_INPUT); //There have to be two "spin states"... one for the up channel and one for the down channel. // We force this for spinors and manually resize states and FullBands. - states.clear(); - states.resize(2); + this->states.clear(); + this->states.resize(2); - FullBands.resize(2); + this->FullBands.resize(2); SPOSet* UpOrbitalSet; std::string sourceName; @@ -56,41 +58,41 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt { OhmmsAttributeSet a; TinyVector TileFactor_do_not_use; - a.add(H5FileName, "href"); + a.add(this->H5FileName, "href"); a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED); a.add(sortBands, "sort"); - a.add(TileMatrix, "tilematrix"); + a.add(this->TileMatrix, "tilematrix"); a.add(twist_num_inp, "twistnum"); a.add(twist_inp, "twist"); a.add(sourceName, "source"); - a.add(MeshFactor, "meshfactor"); + a.add(this->MeshFactor, "meshfactor"); a.add(hybrid_rep, "hybridrep"); a.add(spo_prec, "precision"); a.add(truncate, "truncate"); - a.add(myName, "tag"); + a.add(this->myName, "tag"); - a.put(XMLRoot); + a.put(this->XMLRoot); a.add(numOrbs, "size"); a.add(numOrbs, "norbs"); a.add(spinSet, "spindataset"); a.add(spinSet, "group"); a.put(cur); - if (myName.empty()) - myName = "einspline.spinor"; + if (this->myName.empty()) + this->myName = "einspline.spinor"; } - auto pit(ParticleSets.find(sourceName)); - if (pit == ParticleSets.end()) - myComm->barrier_and_abort("Einspline needs the source particleset"); + auto pit(this->ParticleSets.find(sourceName)); + if (pit == this->ParticleSets.end()) + this->myComm->barrier_and_abort("Einspline needs the source particleset"); else - SourcePtcl = pit->second.get(); + this->SourcePtcl = pit->second.get(); /////////////////////////////////////////////// // Read occupation information from XML file // /////////////////////////////////////////////// - const std::vector last_occ(Occ); - Occ.resize(0, 0); // correspond to ground + const std::vector last_occ(this->Occ); + this->Occ.resize(0, 0); // correspond to ground bool NewOcc(false); { @@ -109,33 +111,33 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt if (cname == "occupation") { std::string occ_mode("ground"); - occ_format = "energy"; - particle_hole_pairs = 0; + this->occ_format = "energy"; + this->particle_hole_pairs = 0; OhmmsAttributeSet oAttrib; oAttrib.add(occ_mode, "mode"); oAttrib.add(spinSet, "spindataset"); - oAttrib.add(occ_format, "format"); - oAttrib.add(particle_hole_pairs, "pairs"); + oAttrib.add(this->occ_format, "format"); + oAttrib.add(this->particle_hole_pairs, "pairs"); oAttrib.put(cur); if (occ_mode == "excited") - putContent(Occ, cur); + putContent(this->Occ, cur); else if (occ_mode != "ground") - myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet Only ground state occupation currently " - "supported in EinsplineSetBuilder."); + this->myComm->barrier_and_abort("EinsplineSetBuilder::createSPOSet Only ground state occupation currently " + "supported in EinsplineSetBuilder."); } cur = cur->next; } - if (Occ != last_occ) + if (this->Occ != last_occ) { NewOcc = true; } else NewOcc = false; - H5OrbSet aset(H5FileName, spinSet, numOrbs); - const auto iter = SPOSetMap.find(aset); - if ((iter != SPOSetMap.end()) && (!NewOcc)) + H5OrbSet aset(this->H5FileName, spinSet, numOrbs); + const auto iter = this->SPOSetMap.find(aset); + if ((iter != this->SPOSetMap.end()) && (!NewOcc)) app_warning() << "!!!!!!! Identical SPOSets are detected by EinsplineSpinorSetBuilder! " "Implicit sharing one SPOSet for spin-up and spin-down electrons has been removed. " "Each determinant creates its own SPOSet with dedicated memory for spline coefficients. " @@ -144,32 +146,32 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt "and reference it by name on the determinant line." << std::endl; - if (FullBands[spinSet] == 0) - FullBands[spinSet] = std::make_unique>(); + if (this->FullBands[spinSet] == nullptr) + this->FullBands[spinSet] = std::make_unique>(); - if (FullBands[spinSet2] == 0) - FullBands[spinSet2] = std::make_unique>(); + if (this->FullBands[spinSet2] == nullptr) + this->FullBands[spinSet2] = std::make_unique>(); //This is to skip checks on ion-ID's, spin types, etc. If we've made it here, we assume we know better //than Einspline on what the data means... bool skipChecks = true; - set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); + this->set_metadata(numOrbs, twist_num_inp, twist_inp, skipChecks); ////////////////////////////////// // Create the OrbitalSet object ////////////////////////////////// Timer mytimer; mytimer.restart(); - OccupyBands(spinSet, sortBands, numOrbs, skipChecks); + this->OccupyBands(spinSet, sortBands, numOrbs, skipChecks); if (spinSet == 0) - TileIons(); + this->TileIons(); bool use_single = (spo_prec == "single" || spo_prec == "float"); // safeguard for a removed feature if (truncate == "yes") - myComm->barrier_and_abort( + this->myComm->barrier_and_abort( "The 'truncate' feature of spline SPO has been removed. Please use hybrid orbital representation."); std::string useGPU("no"); @@ -179,7 +181,7 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt if (MixedSplineReader == 0) { if (use_single) - MixedSplineReader = createBsplineRealSingle(this, hybrid_rep == "yes", useGPU); + MixedSplineReader = createBsplineRealSingleT(this, hybrid_rep == "yes", useGPU); else MixedSplineReader = createBsplineRealDouble(this, hybrid_rep == "yes", useGPU); } @@ -187,30 +189,30 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt else #endif { - if (MixedSplineReader == 0) + if (this->MixedSplineReader == nullptr) { if (use_single) - MixedSplineReader = createBsplineComplexSingle(this, hybrid_rep == "yes", useGPU); + this->MixedSplineReader = createBsplineComplexSingleT(this, hybrid_rep == "yes", useGPU); else - MixedSplineReader = createBsplineComplexDouble(this, hybrid_rep == "yes", useGPU); + this->MixedSplineReader = createBsplineComplexDoubleT(this, hybrid_rep == "yes", useGPU); } } - MixedSplineReader->setCommon(XMLRoot); + this->MixedSplineReader->setCommon(this->XMLRoot); //Norm for spinor wavefunctions is different from SPO's by a factor of sqrt(2). Disable the unit norm check. - MixedSplineReader->setCheckNorm(false); + this->MixedSplineReader->setCheckNorm(false); //Set no rotation to the orbitals - MixedSplineReader->setRotate(false); + this->MixedSplineReader->setRotate(false); //Make the up spin set. - bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0); - auto bspline_zd_u = MixedSplineReader->create_spline_set(spinSet, spo_cur); + this->bcastSortBands(spinSet, this->NumDistinctOrbitals, this->myComm->rank() == 0); + auto bspline_zd_u = this->MixedSplineReader->create_spline_set(spinSet, spo_cur); bspline_zd_u->finalizeConstruction(); //Make the down spin set. - OccupyBands(spinSet2, sortBands, numOrbs, skipChecks); - bcastSortBands(spinSet2, NumDistinctOrbitals, myComm->rank() == 0); - auto bspline_zd_d = MixedSplineReader->create_spline_set(spinSet2, spo_cur); + this->OccupyBands(spinSet2, sortBands, numOrbs, skipChecks); + this->bcastSortBands(spinSet2, this->NumDistinctOrbitals, this->myComm->rank() == 0); + auto bspline_zd_d = this->MixedSplineReader->create_spline_set(spinSet2, spo_cur); bspline_zd_d->finalizeConstruction(); //register with spin set and we're off to the races. @@ -218,4 +220,10 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt spinor_set->set_spos(std::move(bspline_zd_u), std::move(bspline_zd_d)); return spinor_set; }; + +#ifndef MIXED_PRECISION +template class EinsplineSpinorSetBuilderT>; +#else +template class EinsplineSpinorSetBuilderT>; +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h new file mode 100644 index 0000000000..26cc29e559 --- /dev/null +++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h @@ -0,0 +1,54 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file EinsplineSpinorSetBuilderT.h + * + * Derives EinsplineSetBuilder. Overrides the createSPOSetFromXML method to read an up and down channel from hdf5 + * and then construct an appropriate einspline spinor set object. + * + */ +#ifndef QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDERT_H +#define QMCPLUSPLUS_EINSPLINE_SPINORSET_BUILDERT_H + +#include "QMCWaveFunctions/SPOSetT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +class Communicate; + +namespace qmcplusplus +{ + +template +class EinsplineSpinorSetBuilderT : public EinsplineSetBuilderT +{ + using ParticleSet = ParticleSetT; + using SPOSet = SPOSetT; + using PSetMap = std::map>; + +public: + ///constructor + EinsplineSpinorSetBuilderT(ParticleSet& p, const PSetMap& psets, Communicate* comm, xmlNodePtr cur) + : EinsplineSetBuilderT(p, psets, comm, cur){}; + + ///destructor + ~EinsplineSpinorSetBuilderT() override{}; + + /** initialize the Antisymmetric wave function for electrons + * @param cur the current xml node + */ + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; +}; + +} // namespace qmcplusplus + + +#endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp deleted file mode 100644 index 3727a1e2e6..0000000000 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.cpp +++ /dev/null @@ -1,264 +0,0 @@ -#include "FreeOrbital.h" - -namespace qmcplusplus -{ -FreeOrbital::FreeOrbital(const std::string& my_name, const std::vector& kpts_cart) - : SPOSet(my_name), - kvecs(kpts_cart), -#ifdef QMC_COMPLEX - mink(0), // first k at twist may not be 0 -#else - mink(1), // treat k=0 as special case -#endif - maxk(kpts_cart.size()) -{ -#ifdef QMC_COMPLEX - OrbitalSetSize = maxk; -#else - OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split -#endif - k2neg.resize(maxk); - for (int ik = 0; ik < maxk; ik++) - k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); -} - -FreeOrbital::~FreeOrbital() {} - -void FreeOrbital::evaluateVGL(const ParticleSet& P, int iat, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) -{ - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - pvec[ik] = ValueType(coskr, sinkr); - dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; - dpvec[j1] = -sinkr * kvecs[ik]; - dpvec[j2] = coskr * kvecs[ik]; - d2pvec[j1] = k2neg[ik] * coskr; - d2pvec[j2] = k2neg[ik] * sinkr; -#endif - } -#ifndef QMC_COMPLEX - pvec[0] = 1.0; - dpvec[0] = 0.0; - d2pvec[0] = 0.0; -#endif -} - -void FreeOrbital::evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) -{ - const PosType& r = P.activeR(iat); - RealType sinkr, coskr; - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - pvec[ik] = ValueType(coskr, sinkr); -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - pvec[j1] = coskr; - pvec[j2] = sinkr; -#endif - } -#ifndef QMC_COMPLEX - pvec[0] = 1.0; -#endif -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) -{ - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - ValueVector d2p(d2phi[i], OrbitalSetSize); - evaluateVGL(P, iat, p, dp, d2p); - } -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) -{ - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - // phi(r) = cos(kr)+i*sin(kr) - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - // i*phi(r) = -sin(kr) + i*cos(kr) - dp[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - } - } -#endif - } -#ifndef QMC_COMPLEX - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; -#endif - } -} - -void FreeOrbital::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) -{ - RealType sinkr, coskr; - ValueType phi_of_r; - for (int iat = first, i = 0; iat < last; iat++, i++) - { - ValueVector p(phi[i], OrbitalSetSize); - GradVector dp(dphi[i], OrbitalSetSize); - HessVector hess(d2phi_mat[i], OrbitalSetSize); - GGGVector ggg(d3phi_mat[i], OrbitalSetSize); - - const PosType& r = P.activeR(iat); - for (int ik = mink; ik < maxk; ik++) - { - sincos(dot(kvecs[ik], r), &sinkr, &coskr); -#ifdef QMC_COMPLEX - const ValueType compi(0, 1); - phi_of_r = ValueType(coskr, sinkr); - p[ik] = phi_of_r; - dp[ik] = compi * phi_of_r * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[ik](lb, la) = hess[ik](la, lb); - } - } - for (int la = 0; la < OHMMS_DIM; la++) - { - ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; - } -#else - const int j2 = 2 * ik; - const int j1 = j2 - 1; - p[j1] = coskr; - p[j2] = sinkr; - dp[j1] = -sinkr * kvecs[ik]; - dp[j2] = coskr * kvecs[ik]; - for (int la = 0; la < OHMMS_DIM; la++) - { - hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; - hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; - for (int lb = la + 1; lb < OHMMS_DIM; lb++) - { - hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; - hess[j1](lb, la) = hess[j1](la, lb); - hess[j2](lb, la) = hess[j2](la, lb); - ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; - ggg[j1][la](la, lb) = ggg[j1][la](lb, la); - ggg[j2][la](la, lb) = ggg[j2][la](lb, la); - ggg[j1][lb](la, la) = ggg[j1][la](lb, la); - ggg[j2][lb](la, la) = ggg[j2][la](lb, la); - ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; - ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); - ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); - ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); - ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); - for (int lc = lb + 1; lc < OHMMS_DIM; lc++) - { - ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; - ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); - ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); - ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); - ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); - ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); - ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); - ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); - ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); - ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); - ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); - } - } - } -#endif - } -#ifndef QMC_COMPLEX - p[0] = 1.0; - dp[0] = 0.0; - hess[0] = 0.0; - ggg[0] = 0.0; -#endif - } -} - -void FreeOrbital::report(const std::string& pad) const -{ - app_log() << pad << "FreeOrbital report" << std::endl; - for (int ik = 0; ik < kvecs.size(); ik++) - { - app_log() << pad << ik << " " << kvecs[ik] << std::endl; - } - app_log() << pad << "end FreeOrbital report" << std::endl; - app_log().flush(); -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h index 0cbb684545..3901e8dd3f 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbital.h @@ -17,59 +17,12 @@ #ifndef QMCPLUSPLUS_FREE_ORBITAL #define QMCPLUSPLUS_FREE_ORBITAL -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalT.h" namespace qmcplusplus { -class FreeOrbital : public SPOSet -{ -public: - FreeOrbital(const std::string& my_name, const std::vector& kpts_cart); - ~FreeOrbital(); - - std::string getClassName() const override { return "FreeOrbital"; } - - // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j - // i \in [first, last) - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - ValueMatrix& d2phi) override; - - // plug r_i into all orbitals - void evaluateVGL(const ParticleSet& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) override; - void evaluateValue(const ParticleSet& P, int iat, ValueVector& pvec) override; - - // hessian matrix is needed by backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat) override; - - // derivative of hessian is needed to optimize backflow - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& phi, - GradMatrix& dphi, - HessMatrix& d2phi_mat, - GGGMatrix& d3phi_mat) override; - - void report(const std::string& pad) const override; - // ---- begin required overrides - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - void setOrbitalSetSize(int norbs) override { throw std::runtime_error("not implemented"); } - // required overrides end ---- -private: - const std::vector kvecs; // kvecs vectors - const int mink; // minimum k index - const int maxk; // maximum number of kvecs vectors - std::vector k2neg; // minus kvecs^2 -}; +using FreeOrbital = FreeOrbitalT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h index b193c67c66..ff396122eb 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h @@ -1,21 +1,27 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + #ifndef QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H #define QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H -#include "QMCWaveFunctions/SPOSetBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" namespace qmcplusplus { -class FreeOrbitalBuilder : public SPOSetBuilder -{ -public: - FreeOrbitalBuilder(ParticleSet& els, Communicate* comm, xmlNodePtr cur); - ~FreeOrbitalBuilder() {} - - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -private: - ParticleSet& targetPtcl; - bool in_list(const int j, const std::vector l); -}; -} // namespace qmcplusplus +using FreeOrbitalBuilder = FreeOrbitalBuilderT; +} #endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp similarity index 54% rename from src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp rename to src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp index 5861dc9d0f..df2a916837 100644 --- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.cpp +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp @@ -1,16 +1,34 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + #include "OhmmsData/AttributeSet.h" #include "LongRange/StructFact.h" -#include "LongRange/KContainer.h" -#include "QMCWaveFunctions/ElectronGas/FreeOrbital.h" -#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h" +#include "LongRange/KContainerT.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalT.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" namespace qmcplusplus { -FreeOrbitalBuilder::FreeOrbitalBuilder(ParticleSet& els, Communicate* comm, xmlNodePtr cur) - : SPOSetBuilder("PW", comm), targetPtcl(els) +template +FreeOrbitalBuilderT::FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur) + : SPOSetBuilderT("PW", comm), targetPtcl(els) {} -std::unique_ptr FreeOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> FreeOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) { int norb = -1; std::string spo_object_name; @@ -53,7 +71,7 @@ std::unique_ptr FreeOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) // extract npw k-points from container // kpts_cart is sorted by magnitude std::vector kpts(npw); - KContainer klists; + KContainerT klists; RealType kcut = lattice.LR_kc; // to-do: reduce kcut to >~ kf klists.updateKLists(lattice, kcut, lattice.ndim, twist); @@ -82,12 +100,13 @@ std::unique_ptr FreeOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) break; } #endif - auto sposet = std::make_unique(spo_object_name, kpts); + auto sposet = std::make_unique>(spo_object_name, kpts); sposet->report(" "); return sposet; } -bool FreeOrbitalBuilder::in_list(const int j, const std::vector l) +template +bool FreeOrbitalBuilderT::in_list(const int j, const std::vector l) { for (int i = 0; i < l.size(); i++) { @@ -97,4 +116,9 @@ bool FreeOrbitalBuilder::in_list(const int j, const std::vector l) return false; } +template class FreeOrbitalBuilderT; +template class FreeOrbitalBuilderT; +template class FreeOrbitalBuilderT>; +template class FreeOrbitalBuilderT>; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h new file mode 100644 index 0000000000..f408692ea8 --- /dev/null +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h @@ -0,0 +1,41 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_FREE_ORBITAL_BUILDERT_H +#define QMCPLUSPLUS_FREE_ORBITAL_BUILDERT_H + +#include "QMCWaveFunctions/SPOSetBuilderT.h" + +namespace qmcplusplus +{ +template +class FreeOrbitalBuilderT : public SPOSetBuilderT +{ +public: + using RealType = typename SPOSetBuilderT::RealType; + using PosType = typename SPOSetBuilderT::PosType; + + FreeOrbitalBuilderT(ParticleSetT& els, Communicate* comm, xmlNodePtr cur); + ~FreeOrbitalBuilderT() {} + + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + +private: + ParticleSetT& targetPtcl; + bool in_list(const int j, const std::vector l); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp new file mode 100644 index 0000000000..0c3026c88a --- /dev/null +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp @@ -0,0 +1,701 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "FreeOrbitalT.h" + +namespace qmcplusplus +{ + +template +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{} + +template<> +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; +} + +template<> +void FreeOrbitalT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + dpvec[j1] = -sinkr * kvecs[ik]; + dpvec[j2] = coskr * kvecs[ik]; + d2pvec[j1] = k2neg[ik] * coskr; + d2pvec[j2] = k2neg[ik] * sinkr; + } + pvec[0] = 1.0; + dpvec[0] = 0.0; + d2pvec[0] = 0.0; +} + +template<> +void FreeOrbitalT>::evaluateVGL(const ParticleSetT>& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } +} + +template<> +void FreeOrbitalT>::evaluateVGL(const ParticleSetT>& P, + int iat, + ValueVector& pvec, + GradVector& dpvec, + ValueVector& d2pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + pvec[ik] = ValueType(coskr, sinkr); + dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik]; + d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr); + } +} + +template<> +void FreeOrbitalT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; +} + +template<> +void FreeOrbitalT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + pvec[j1] = coskr; + pvec[j2] = sinkr; + } + pvec[0] = 1.0; +} + +template<> +void FreeOrbitalT>::evaluateValue(const ParticleSetT>& P, + int iat, + ValueVector& pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + pvec[ik] = std::complex(coskr, sinkr); + } +} + +template<> +void FreeOrbitalT>::evaluateValue(const ParticleSetT>& P, + int iat, + ValueVector& pvec) +{ + const PosType& r = P.activeR(iat); + RealType sinkr, coskr; + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + pvec[ik] = std::complex(coskr, sinkr); + } +} + +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{ + RealType sinkr, coskr; + float phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + } + } + } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + } +} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{ + RealType sinkr, coskr; + double phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + } + } + } + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + } +} + +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{ + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + } + } +} + +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) +{ + RealType sinkr, coskr; + std::complex phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + HessVector hess(d2phi_mat[i], this->OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + + phi_of_r = std::complex(coskr, sinkr); + p[ik] = phi_of_r; + + dp[ik] = std::complex(-sinkr, coskr) * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + } + } +} + +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) + { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } + } + } + } + + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; + } +} + +template<> +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const int j2 = 2 * ik; + const int j1 = j2 - 1; + p[j1] = coskr; + p[j2] = sinkr; + dp[j1] = -sinkr * kvecs[ik]; + dp[j2] = coskr * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la]; + hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[j1](lb, la) = hess[j1](la, lb); + hess[j2](lb, la) = hess[j2](la, lb); + ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la]; + ggg[j1][la](la, lb) = ggg[j1][la](lb, la); + ggg[j2][la](la, lb) = ggg[j2][la](lb, la); + ggg[j1][lb](la, la) = ggg[j1][la](lb, la); + ggg[j2][lb](la, la) = ggg[j2][la](lb, la); + ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb]; + ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb); + ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb); + ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb); + ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb); + for (int lc = lb + 1; lc < OHMMS_DIM; lc++) + { + ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc]; + ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc); + ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc); + ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc); + ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc); + ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc); + ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc); + ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc); + ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc); + ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc); + ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc); + } + } + } + } + + p[0] = 1.0; + dp[0] = 0.0; + hess[0] = 0.0; + ggg[0] = 0.0; + } +} + +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + for (int la = 0; la < OHMMS_DIM; la++) + { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } + } + } +} + +template<> +void FreeOrbitalT>::evaluate_notranspose(const ParticleSetT>& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) +{ + RealType sinkr, coskr; + ValueType phi_of_r; + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], OrbitalSetSize); + GradVector dp(dphi[i], OrbitalSetSize); + HessVector hess(d2phi_mat[i], OrbitalSetSize); + GGGVector ggg(d3phi_mat[i], OrbitalSetSize); + + const PosType& r = P.activeR(iat); + for (int ik = mink; ik < maxk; ik++) + { + sincos(dot(kvecs[ik], r), &sinkr, &coskr); + const ValueType compi(0, 1); + phi_of_r = ValueType(coskr, sinkr); + p[ik] = phi_of_r; + dp[ik] = compi * phi_of_r * kvecs[ik]; + for (int la = 0; la < OHMMS_DIM; la++) + { + hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la]; + for (int lb = la + 1; lb < OHMMS_DIM; lb++) + { + hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb]; + hess[ik](lb, la) = hess[ik](la, lb); + } + } + for (int la = 0; la < OHMMS_DIM; la++) + { + ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik]; + } + } + } +} + +// generic implementation + +template +FreeOrbitalT::~FreeOrbitalT() +{} + +template +void FreeOrbitalT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + ValueMatrix& d2phi) +{ + for (int iat = first, i = 0; iat < last; iat++, i++) + { + ValueVector p(phi[i], this->OrbitalSetSize); + GradVector dp(dphi[i], this->OrbitalSetSize); + ValueVector d2p(d2phi[i], this->OrbitalSetSize); + evaluateVGL(P, iat, p, dp, d2p); + } +} + +// Explicit template specialization +template<> +FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) +{ + this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); +} + +template<> +FreeOrbitalT::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT(my_name), + kvecs(kpts_cart), + mink(1), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) +{ + this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); +} + +template<> +FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) +{ + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); +} + +template<> +FreeOrbitalT>::FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart) + : SPOSetT>(my_name), + kvecs(kpts_cart), + mink(0), // treat k=0 as special case + maxk(kpts_cart.size()), + k2neg(maxk) +{ + this->OrbitalSetSize = maxk; // SPOSet member + for (int ik = 0; ik < maxk; ik++) + k2neg[ik] = -dot(kvecs[ik], kvecs[ik]); +} + +template +void FreeOrbitalT::report(const std::string& pad) const +{ + app_log() << pad << "FreeOrbital report" << std::endl; + for (int ik = 0; ik < kvecs.size(); ik++) + { + app_log() << pad << ik << " " << kvecs[ik] << std::endl; + } + app_log() << pad << "end FreeOrbital report" << std::endl; + app_log().flush(); +} + +template class FreeOrbitalT; +template class FreeOrbitalT; +template class FreeOrbitalT>; +template class FreeOrbitalT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h new file mode 100644 index 0000000000..0cc825849f --- /dev/null +++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h @@ -0,0 +1,88 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron +// William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_FREE_ORBITALT_H +#define QMCPLUSPLUS_FREE_ORBITALT_H + +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +class FreeOrbitalT : public SPOSetT +{ +public: + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using ValueType = typename SPOSetT::ValueType; + + FreeOrbitalT(const std::string& my_name, const std::vector& kpts_cart); + ~FreeOrbitalT(); + + inline std::string getClassName() const final { return "FreeOrbital"; } + + // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j + // i \in [first, last) + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + ValueMatrix& d2phi) final; + + // plug r_i into all orbitals + void evaluateVGL(const ParticleSetT& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final; + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& pvec) final; + + // hessian matrix is needed by backflow + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat) final; + + // derivative of hessian is needed to optimize backflow + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& phi, + GradMatrix& dphi, + HessMatrix& d2phi_mat, + GGGMatrix& d3phi_mat) override; + + void report(const std::string& pad) const override; + // ---- begin required overrides + std::unique_ptr> makeClone() const final { return std::make_unique>(*this); } + void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); } + // required overrides end ---- +private: + const std::vector kvecs; // kvecs vectors + const int mink; // minimum k index + const int maxk; // maximum number of kvecs vectors + std::vector k2neg; // minus kvecs^2 +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/ExampleHeComponent.h b/src/QMCWaveFunctions/ExampleHeComponent.h index e8478dc44a..3199e33f9d 100644 --- a/src/QMCWaveFunctions/ExampleHeComponent.h +++ b/src/QMCWaveFunctions/ExampleHeComponent.h @@ -32,7 +32,7 @@ class ExampleHeComponent : public WaveFunctionComponent, OptimizableObject my_table_ee_idx_(els.addTable(els, DTModes::NEED_TEMP_DATA_ON_HOST | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), my_table_ei_idx_(els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST)){}; - using OptVariablesType = optimize::VariableSet; + //using OptVariablesType = optimize::VariableSet; using PtclGrpIndexes = QMCTraits::PtclGrpIndexes; std::string getClassName() const override { return "ExampleHeComponent"; } diff --git a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h index 5178de3f5c..0fb838f063 100644 --- a/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h +++ b/src/QMCWaveFunctions/Fermion/Backflow_ee_kSpace.h @@ -26,10 +26,6 @@ namespace qmcplusplus class Backflow_ee_kSpace : public BackflowFunctionBase { using ComplexType = QMCTraits::ComplexType; - ///typedef for real values - //using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; public: //number of groups of the target particleset diff --git a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h index e7513eeeae..3e50ea7f0a 100644 --- a/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h +++ b/src/QMCWaveFunctions/Fermion/SlaterDetBuilder.h @@ -19,7 +19,10 @@ #include #include "Configuration.h" #include "WaveFunctionComponentBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include +#include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "QMCWaveFunctions/SPOSetBuilder.h" namespace qmcplusplus { @@ -28,9 +31,6 @@ class BackflowTransformation; class DiracDeterminantBase; class MultiSlaterDetTableMethod; struct CSFData; -class SPOSet; -class SPOSetBuilder; -class SPOSetBuilderFactory; struct ci_configuration; /** derived class from WaveFunctionComponentBuilder diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h index 3503449b7f..cfce7722a2 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.h @@ -14,132 +14,12 @@ #ifndef QMCPLUSPLUS_SHOSET_H #define QMCPLUSPLUS_SHOSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/SPOInfo.h" - +#include "Configuration.h" +#include "QMCWaceFunctions/HarmonicOscillator/SHOSetT.h" namespace qmcplusplus { -struct SHOState : public SPOInfo -{ - TinyVector quantum_number; - - SHOState() - { - quantum_number = -1; - energy = 0.0; - } - - ~SHOState() override {} - - inline void set(TinyVector qn, RealType e) - { - quantum_number = qn; - energy = e; - } - - inline void sho_report(const std::string& pad = "") const - { - app_log() << pad << "qn=" << quantum_number << " e=" << energy << std::endl; - } -}; - - -struct SHOSet : public SPOSet -{ - using value_type = ValueMatrix::value_type; - using grad_type = GradMatrix::value_type; - - RealType length; - PosType center; - - int nmax; - TinyVector qn_max; - std::vector state_info; - std::vector prefactors; - Array hermite; - Array bvalues; - Array d0_values; - Array d1_values; - Array d2_values; - - //construction/destruction - SHOSet(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states); - - ~SHOSet() override; - - std::string getClassName() const override { return "SHOSet"; } - - void initialize(); - - //SPOSet interface methods - std::unique_ptr makeClone() const override; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - - //local functions - void evaluate_v(PosType r, ValueVector& psi); - void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - void evaluate_hermite(const PosType& xpos); - void evaluate_d0(const PosType& xpos, ValueVector& psi); - void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); - void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); - void report(const std::string& pad = "") const override; - void test_derivatives(); - void test_overlap(); - void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); - - //empty methods - /// number of orbitals is determined only by initial request - inline void setOrbitalSetSize(int norbs) override {} - - ///unimplemented functions call this to abort - inline void not_implemented(const std::string& method) - { - APP_ABORT("SHOSet::" + method + " has not been implemented."); - } - - - //methods to be implemented in the future (possibly) - void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& dddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet) override; - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& ddlogdet, - GGGMatrix& dddlogdet) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& dphi, - HessMatrix& ddphi, - GradMatrix& dlapl_phi) override; -}; +using SHOSet = SHOSetT; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h index fc2b75be22..a35851c32a 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.h @@ -14,48 +14,12 @@ #ifndef QMCPLUSPLUS_SHO_BASIS_BUILDER_H #define QMCPLUSPLUS_SHO_BASIS_BUILDER_H -#include "QMCWaveFunctions/HarmonicOscillator/SHOSet.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetInfo.h" +#include "Configuration.h" +#include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h" namespace qmcplusplus { -struct SHOSetBuilder : public SPOSetBuilder -{ - //enum{DIM=OHMMS_DIM} - - ParticleSet& Ps; - - RealType length; - RealType mass; - RealType energy; - PosType center; - - int nstates; - int nmax; - TinyVector ind_dims; - - SPOSetInfoSimple basis_states; - - //construction/destruction - SHOSetBuilder(ParticleSet& P, Communicate* comm); - - ~SHOSetBuilder() override; - - //reset parameters - void reset(); - - //SPOSetBuilder interface - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - - std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; - - //local functions - void update_basis_states(int smax); - void report(const std::string& pad = ""); -}; - +using SHOSetBuilder = SHOSetBuilderT; } // namespace qmcplusplus - #endif diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp similarity index 55% rename from src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp rename to src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp index bc3adf1d7a..b0e1f7c477 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilder.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp @@ -10,29 +10,29 @@ // File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "SHOSetBuilderT.h" -#include "SHOSetBuilder.h" -#include "QMCWaveFunctions/SPOSetInputInfo.h" #include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/SPOSetInputInfo.h" #include "Utilities/IteratorUtility.h" #include "Utilities/string_utils.h" - namespace qmcplusplus { -SHOSetBuilder::SHOSetBuilder(ParticleSet& P, Communicate* comm) : SPOSetBuilder("SHO", comm), Ps(P) +template +SHOSetBuilderT::SHOSetBuilderT(ParticleSetT& P, Communicate* comm) : SPOSetBuilderT("SHO", comm), Ps(P) { - ClassName = "SHOSetBuilder"; - legacy = false; - app_log() << "Constructing SHOSetBuilder" << std::endl; + this->ClassName = "SHOSetBuilderT"; + this->legacy = false; + app_log() << "Constructing SHOSetBuilderT" << std::endl; reset(); } +template +SHOSetBuilderT::~SHOSetBuilderT() = default; -SHOSetBuilder::~SHOSetBuilder() {} - - -void SHOSetBuilder::reset() +template +void SHOSetBuilderT::reset() { nstates = 0; mass = -1.0; @@ -41,26 +41,23 @@ void SHOSetBuilder::reset() center = 0.0; } - -std::unique_ptr SHOSetBuilder::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> SHOSetBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - APP_ABORT("SHOSetBuilder::createSPOSetFromXML SHOSetBuilder should not use legacy interface"); + APP_ABORT("SHOSetBuilderT::createSPOSetFromXML SHOSetBuilder should not " + "use legacy interface"); - app_log() << "SHOSetBuilder::createSHOSet(xml) " << std::endl; + app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl; SPOSetInputInfo input(cur); return createSPOSet(cur, input); } - -std::unique_ptr SHOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) +template +std::unique_ptr> SHOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) { - app_log() << "SHOSetBuilder::createSHOSet(indices) " << std::endl; - - using std::ceil; - using std::sqrt; - + app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl; reset(); // read parameters @@ -83,7 +80,7 @@ std::unique_ptr SHOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputI if (mass < 0.0) mass = 1.0 / (energy * length * length); else if (length < 0.0) - length = 1.0 / sqrt(mass * energy); + length = 1.0 / std::sqrt(mass * energy); // initialize states and/or adjust basis int smax = -1; @@ -91,65 +88,56 @@ std::unique_ptr SHOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputI smax = std::max(smax, input.max_index()); if (input.has_energy_info) { - smax = std::max(smax, (int)ceil(input.max_energy() / energy)); + smax = std::max(smax, (int)std::ceil(input.max_energy() / energy)); } if (smax < 0) - APP_ABORT("SHOSetBuilder::Initialize\n invalid basis size"); + APP_ABORT("SHOSetBuilderT::Initialize\n invalid basis size"); update_basis_states(smax); // create sho state request - indices_t& indices = input.get_indices(states); + indices_t& indices = input.get_indices(this->states); std::vector sho_states; for (int i = 0; i < indices.size(); ++i) sho_states.push_back(basis_states[indices[i]]); // make the sposet - auto sho = std::make_unique(spo_name, length, center, sho_states); + auto sho = std::make_unique>(spo_name, length, center, sho_states); sho->report(" "); - //sho->test_derivatives(); - //sho->test_overlap(); - //APP_ABORT("SHOSetBuilder check"); - return sho; } - -void SHOSetBuilder::update_basis_states(int smax) +template +void SHOSetBuilderT::update_basis_states(int smax) { - using std::ceil; - using std::exp; - using std::log; - using std::sort; - using std::sqrt; - int states_required = smax - basis_states.size() + 1; if (states_required > 0) { RealType N = smax + 1; - if (DIM == 1) + if (QMCTraits::DIM == 1) nmax = smax; - else if (DIM == 2) - nmax = ceil(.5 * sqrt(8. * N + 1.) - 1.5); - else if (DIM == 3) + else if (QMCTraits::DIM == 2) + nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5); + else if (QMCTraits::DIM == 3) { - RealType f = exp(1.0 / 3.0 * log(81. * N + 3. * sqrt(729. * N * N - 3.))); - nmax = ceil(f / 3. + 1. / f - 2.); + RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.))); + nmax = std::ceil(f / 3. + 1. / f - 2.); } else - APP_ABORT("SHOSetBuilder::update_basis_states dimensions other than 1, 2, or 3 are not supported"); - int ndim = nmax + 1; - ind_dims[DIM - 1] = 1; - for (int d = DIM - 2; d > -1; --d) + APP_ABORT("SHOSetBuilderT::update_basis_states dimensions other " + "than 1, 2, or 3 are not supported"); + int ndim = nmax + 1; + ind_dims[QMCTraits::DIM - 1] = 1; + for (int d = QMCTraits::DIM - 2; d > -1; --d) ind_dims[d] = ind_dims[d + 1] * ndim; int s = 0; - int ntot = pow(ndim, DIM); - TinyVector qnumber; + int ntot = pow(ndim, QMCTraits::DIM); + TinyVector qnumber; for (int m = 0; m < ntot; ++m) { int n = 0; // principal quantum number int nrem = m; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { int i = nrem / ind_dims[d]; nrem -= i * ind_dims[d]; @@ -166,7 +154,7 @@ void SHOSetBuilder::update_basis_states(int smax) st = new SHOState(); basis_states.add(st); } - RealType e = energy * (n + .5 * DIM); + RealType e = energy * (n + .5 * QMCTraits::DIM); st->set(qnumber, e); s++; } @@ -177,27 +165,28 @@ void SHOSetBuilder::update_basis_states(int smax) // reset energy scale even if no states need to be added for (int i = 0; i < basis_states.size(); ++i) { - SHOState& state = *basis_states[i]; - const TinyVector& qnumber = state.quantum_number; - int n = 0; - for (int d = 0; d < DIM; ++d) + SHOState& state = *basis_states[i]; + const TinyVector& qnumber = state.quantum_number; + int n = 0; + for (int d = 0; d < QMCTraits::DIM; ++d) n += qnumber[d]; - state.energy = energy * (n + .5 * DIM); + state.energy = energy * (n + .5 * QMCTraits::DIM); } - //somewhat redundant, but necessary - clear_states(0); - states[0]->finish(basis_states.states); + // somewhat redundant, but necessary + this->clear_states(0); + this->states[0]->finish(basis_states.states); if (basis_states.size() <= smax) - APP_ABORT("SHOSetBuilder::update_basis_states failed to make enough states"); + APP_ABORT("SHOSetBuilderT::update_basis_states failed to make enough " + "states"); } - -void SHOSetBuilder::report(const std::string& pad) +template +void SHOSetBuilderT::report(const std::string& pad) { - app_log() << pad << "SHOSetBuilder report" << std::endl; - app_log() << pad << " dimension = " << DIM << std::endl; + app_log() << pad << "SHOSetBuilderT report" << std::endl; + app_log() << pad << " dimension = " << QMCTraits::DIM << std::endl; app_log() << pad << " mass = " << mass << std::endl; app_log() << pad << " frequency = " << energy << std::endl; app_log() << pad << " energy = " << energy << std::endl; @@ -210,8 +199,16 @@ void SHOSetBuilder::report(const std::string& pad) app_log() << pad << " basis_states" << std::endl; for (int s = 0; s < basis_states.size(); ++s) basis_states[s]->report(pad + " " + int2string(s) + " "); - app_log() << pad << "end SHOSetBuilder report" << std::endl; + app_log() << pad << "end SHOSetBuilderT report" << std::endl; app_log().flush(); } +#ifndef QMC_COMPLEX +template class SHOSetBuilderT; +template class SHOSetBuilderT; +#else +template class SHOSetBuilderT>; +template class SHOSetBuilderT>; +#endif + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h new file mode 100644 index 0000000000..238d466b23 --- /dev/null +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h @@ -0,0 +1,62 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H +#define QMCPLUSPLUS_SHO_BASIS_BUILDERT_H + +#include "QMCWaveFunctions/HarmonicOscillator/SHOSetT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" +#include "QMCWaveFunctions/SPOSetInfo.h" + +namespace qmcplusplus +{ +template +class SHOSetBuilderT : public SPOSetBuilderT +{ +public: + using RealType = typename SPOSetT::RealType; + using PosType = typename SPOSetT::PosType; + using indices_t = typename SPOSetBuilderT::indices_t; + + ParticleSetT& Ps; + + RealType length; + RealType mass; + RealType energy; + PosType center; + + int nstates; + int nmax; + TinyVector ind_dims; + + SPOSetInfoSimple basis_states; + + // construction/destruction + SHOSetBuilderT(ParticleSetT& P, Communicate* comm); + + ~SHOSetBuilderT() override; + + // reset parameters + void reset(); + + // SPOSetBuilder interface + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + + std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override; + + // local functions + void update_basis_states(int smax); + void report(const std::string& pad = ""); +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp similarity index 66% rename from src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp rename to src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp index 3a6e5872e1..bb79e6d9e9 100644 --- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSet.cpp +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp @@ -10,14 +10,15 @@ // File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "SHOSetT.h" -#include "SHOSet.h" #include "Utilities/string_utils.h" namespace qmcplusplus { -SHOSet::SHOSet(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states) - : SPOSet(my_name), length(l), center(c) +template +SHOSetT::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states) + : SPOSetT(my_name), length(l), center(c) { state_info.resize(sho_states.size()); for (int s = 0; s < sho_states.size(); ++s) @@ -25,30 +26,26 @@ SHOSet::SHOSet(const std::string& my_name, RealType l, PosType c, const std::vec initialize(); } - -void SHOSet::initialize() +template +void SHOSetT::initialize() { using std::sqrt; - OrbitalSetSize = state_info.size(); + this->OrbitalSetSize = state_info.size(); qn_max = -1; for (int s = 0; s < state_info.size(); ++s) - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]); qn_max += 1; nmax = -1; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) nmax = std::max(nmax, qn_max[d]); prefactors.resize(nmax); - hermite.resize(DIM, nmax); - bvalues.resize(DIM, nmax); - - //d0_values.resize(DIM,nmax); - //d1_values.resize(DIM,nmax); - //d2_values.resize(DIM,nmax); + hermite.resize(QMCTraits::DIM, nmax); + bvalues.resize(QMCTraits::DIM, nmax); if (nmax > 0) { @@ -58,14 +55,17 @@ void SHOSet::initialize() } } +template +SHOSetT::~SHOSetT() = default; -SHOSet::~SHOSet() {} - - -std::unique_ptr SHOSet::makeClone() const { return std::make_unique(*this); } - +template +std::unique_ptr> SHOSetT::makeClone() const +{ + return std::make_unique>(*this); +} -void SHOSet::report(const std::string& pad) const +template +void SHOSetT::report(const std::string& pad) const { app_log() << pad << "SHOSet report" << std::endl; app_log() << pad << " length = " << length << std::endl; @@ -80,51 +80,51 @@ void SHOSet::report(const std::string& pad) const app_log().flush(); } - -void SHOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void SHOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], size()); + ValueVector p(&psi[0], this->size()); evaluate_v(r, p); } - -void SHOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { const PosType& r(P.activeR(iat)); - ValueVector p(&psi[0], size()); - GradVector dp(&dpsi[0], size()); - ValueVector d2p(&d2psi[0], size()); + ValueVector p(&psi[0], this->size()); + GradVector dp(&dpsi[0], this->size()); + ValueVector d2p(&d2psi[0], this->size()); evaluate_vgl(r, p, dp, d2p); } - -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { for (int iat = first, i = 0; iat < last; ++iat, ++i) { - ValueVector p(logdet[i], size()); - GradVector dp(dlogdet[i], size()); - ValueVector d2p(d2logdet[i], size()); + ValueVector p(logdet[i], this->size()); + GradVector dp(dlogdet[i], this->size()); + ValueVector d2p(d2logdet[i], this->size()); evaluate_vgl(P.R[iat], p, dp, d2p); } } - -void SHOSet::evaluate_v(PosType r, ValueVector& psi) +template +void SHOSetT::evaluate_v(PosType r, ValueVector& psi) { PosType x = (r - center) / length; evaluate_hermite(x); evaluate_d0(x, psi); } - -void SHOSet::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { PosType x = (r - center) / length; evaluate_hermite(x); @@ -133,10 +133,10 @@ void SHOSet::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVe evaluate_d2(x, psi, d2psi); } - -void SHOSet::evaluate_hermite(const PosType& xpos) +template +void SHOSetT::evaluate_hermite(const PosType& xpos) { - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { int nh = qn_max[d]; if (nh > 0) @@ -156,11 +156,11 @@ void SHOSet::evaluate_hermite(const PosType& xpos) } } - -void SHOSet::evaluate_d0(const PosType& xpos, ValueVector& psi) +template +void SHOSetT::evaluate_d0(const PosType& xpos, ValueVector& psi) { using std::exp; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { RealType x = xpos[d]; RealType g = exp(-.5 * x * x); @@ -173,17 +173,17 @@ void SHOSet::evaluate_d0(const PosType& xpos, ValueVector& psi) { const SHOState& state = state_info[s]; RealType phi = 1.0; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) phi *= bvalues(d, state.quantum_number[d]); psi[s] = phi; } } - -void SHOSet::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) +template +void SHOSetT::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi) { RealType ol = 1.0 / length; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { RealType x = xpos[d]; RealType Hnm1 = 0.0; @@ -197,19 +197,19 @@ void SHOSet::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi for (int s = 0; s < state_info.size(); ++s) { const SHOState& state = state_info[s]; - TinyVector dphi; - for (int d = 0; d < DIM; ++d) + TinyVector dphi; + for (int d = 0; d < QMCTraits::DIM; ++d) dphi[d] = bvalues(d, state.quantum_number[d]); dphi *= psi[s]; dpsi[s] = dphi; } } - -void SHOSet::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi) +template +void SHOSetT::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi) { RealType ol2 = 1.0 / (length * length); - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { RealType x = xpos[d]; RealType x2 = x * x; @@ -221,16 +221,16 @@ void SHOSet::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2p for (int s = 0; s < state_info.size(); ++s) { const SHOState& state = state_info[s]; - ValueType d2phi = 0.0; - for (int d = 0; d < DIM; ++d) + T d2phi = 0.0; + for (int d = 0; d < QMCTraits::DIM; ++d) d2phi += bvalues(d, state.quantum_number[d]); d2phi *= psi[s]; d2psi[s] = d2phi; } } - -void SHOSet::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SHOSetT::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) { using std::exp; using std::sqrt; @@ -245,7 +245,7 @@ void SHOSet::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, Value for (int n = 1; n < N; ++n) pre[n] = pre[n - 1] / sqrt(2. * n); - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { RealType x = (r[d] - center[d]) / length; RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x; @@ -302,8 +302,8 @@ void SHOSet::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, Value } } - -void SHOSet::test_derivatives() +template +void SHOSetT::test_derivatives() { int n = 3; PosType c = 5.123; @@ -318,7 +318,6 @@ void SHOSet::test_derivatives() GradVector vdpsi, vdpsin; ValueVector vd2psi, vd2psin; - vpsi.resize(nphi); vdpsi.resize(nphi); vd2psi.resize(nphi); @@ -327,20 +326,18 @@ void SHOSet::test_derivatives() vdpsin.resize(nphi); vd2psin.resize(nphi); + ValueVector psi(&vpsi[0], this->size()); + GradVector dpsi(&vdpsi[0], this->size()); + ValueVector d2psi(&vd2psi[0], this->size()); - ValueVector psi(&vpsi[0], size()); - GradVector dpsi(&vdpsi[0], size()); - ValueVector d2psi(&vd2psi[0], size()); - - ValueVector psitmp(&vpsitmp[0], size()); - GradVector dpsin(&vdpsin[0], size()); - ValueVector d2psin(&vd2psin[0], size()); - + ValueVector psitmp(&vpsitmp[0], this->size()); + GradVector dpsin(&vdpsin[0], this->size()); + ValueVector d2psin(&vd2psin[0], this->size()); app_log() << " loading dr" << std::endl; RealType odr2sum = 0.0; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { RealType odr = 1.0 / dr[d]; o2dr[d] = .5 * odr; @@ -350,8 +347,8 @@ void SHOSet::test_derivatives() app_log() << "SHOSet::test_derivatives" << std::endl; - const SimulationCell simulation_cell; - ParticleSet Ps(simulation_cell); + const SimulationCellT simulation_cell; + ParticleSetT Ps(simulation_cell); int p = 0; PosType r, rtmp; @@ -365,22 +362,19 @@ void SHOSet::test_derivatives() { r[2] = c[2] + k * drg[2]; - //evaluate_check(r,psi,dpsi,d2psi); - //APP_ABORT("SHOSet eval check"); - evaluate_vgl(r, psi, dpsi, d2psi); for (int m = 0; m < nphi; ++m) d2psin[m] = -2 * odr2sum * psi[m]; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) { rtmp = r; rtmp[d] += dr[d]; evaluate_v(rtmp, psitmp); for (int m = 0; m < nphi; ++m) { - ValueType phi = psitmp[m]; - dpsin[m][d] = phi * o2dr[d]; + T phi = psitmp[m]; + dpsin[m][d] = phi * o2dr[d]; d2psin[m] += phi * odr2[d]; } rtmp = r; @@ -388,7 +382,7 @@ void SHOSet::test_derivatives() evaluate_v(rtmp, psitmp); for (int m = 0; m < nphi; ++m) { - ValueType phi = psitmp[m]; + T phi = psitmp[m]; dpsin[m][d] -= phi * o2dr[d]; d2psin[m] += phi * odr2[d]; } @@ -397,7 +391,7 @@ void SHOSet::test_derivatives() RealType dphi_diff = 0.0; RealType d2phi_diff = 0.0; for (int m = 0; m < nphi; ++m) - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) dphi_diff = std::max(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d])); for (int m = 0; m < nphi; ++m) d2phi_diff = std::max(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m])); @@ -406,14 +400,14 @@ void SHOSet::test_derivatives() for (int m = 0; m < nphi; ++m) { std::string qn = ""; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) qn += int2string(state_info[m].quantum_number[d]) + " "; app_log() << " " << qn; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) app_log() << real(dpsi[m][d]) << " "; app_log() << std::endl; app_log() << " " << qn; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) app_log() << real(dpsin[m][d]) << " "; app_log() << std::endl; } @@ -422,7 +416,7 @@ void SHOSet::test_derivatives() for (int m = 0; m < nphi; ++m) { std::string qn = ""; - for (int d = 0; d < DIM; ++d) + for (int d = 0; d < QMCTraits::DIM; ++d) qn += int2string(state_info[m].quantum_number[d]) + " "; app_log() << " " << qn << real(d2psi[m] / psi[m]) << std::endl; app_log() << " " << qn << real(d2psin[m] / psi[m]) << std::endl; @@ -435,13 +429,12 @@ void SHOSet::test_derivatives() app_log() << "end SHOSet::test_derivatives" << std::endl; } - -void SHOSet::test_overlap() +template +void SHOSetT::test_overlap() { app_log() << "SHOSet::test_overlap" << std::endl; - - //linear + // linear int d = 0; app_log() << " length = " << length << std::endl; @@ -452,8 +445,8 @@ void SHOSet::test_overlap() app_log() << " 1d overlap" << std::endl; ValueVector vpsi; - vpsi.resize(size()); - ValueVector psi(&vpsi[0], size()); + vpsi.resize(this->size()); + ValueVector psi(&vpsi[0], this->size()); double xmax = 4.0; double dx = .1; @@ -486,11 +479,10 @@ void SHOSet::test_overlap() } app_log() << std::endl; - - //volumetric + // volumetric app_log() << " 3d overlap" << std::endl; double dV = dr * dr * dr; - nphi = size(); + nphi = this->size(); omat.resize(nphi, nphi); for (int i = 0; i < nphi; ++i) for (int j = 0; j < nphi; ++j) @@ -517,57 +509,69 @@ void SHOSet::test_overlap() } app_log() << std::endl; - app_log() << "end SHOSet::test_overlap" << std::endl; } - -void SHOSet::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) +template +void SHOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) { not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)"); } -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) { not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)"); } -void SHOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) +template +void SHOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) { not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)"); } -void SHOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +template +void SHOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) { not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)"); } -void SHOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) +template +void SHOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) { not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)"); } +// Class concrete types from ValueType +#ifndef QMC_COMPLEX +template class SHOSetT; +template class SHOSetT; +#else +template class SHOSetT>; +template class SHOSetT>; +#endif + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h new file mode 100644 index 0000000000..b0e176633f --- /dev/null +++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h @@ -0,0 +1,154 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SHOSETT_H +#define QMCPLUSPLUS_SHOSETT_H + +#include "QMCWaveFunctions/SPOInfo.h" +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +struct SHOState : public SPOInfo +{ + TinyVector quantum_number; + + SHOState() + { + quantum_number = -1; + energy = 0.0; + } + + ~SHOState() override {} + + inline void set(TinyVector qn, RealType e) + { + quantum_number = qn; + energy = e; + } + + inline void sho_report(const std::string& pad = "") const + { + app_log() << pad << "qn=" << quantum_number << " e=" << energy << std::endl; + } +}; + +template +class SHOSetT : public SPOSetT +{ +public: + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using value_type = typename ValueMatrix::value_type; + using grad_type = typename GradMatrix::value_type; + using RealType = typename SPOSetT::RealType; + using PosType = TinyVector; + using HessType = typename OrbitalSetTraits::HessType; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGType = TinyVector; + using GGGVector = Vector; + using GGGMatrix = Matrix; + + RealType length; + PosType center; + + int nmax; + TinyVector qn_max; + std::vector state_info; + std::vector prefactors; + Array hermite; + Array bvalues; + Array d0_values; + Array d1_values; + Array d2_values; + + // construction/destruction + SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector& sho_states); + + ~SHOSetT() override; + + std::string getClassName() const override { return "SHOSet"; } + + void initialize(); + + // SPOSet interface methods + std::unique_ptr> makeClone() const override; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + // local functions + void evaluate_v(PosType r, ValueVector& psi); + void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + void evaluate_hermite(const PosType& xpos); + void evaluate_d0(const PosType& xpos, ValueVector& psi); + void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi); + void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi); + void report(const std::string& pad = "") const override; + void test_derivatives(); + void test_overlap(); + void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + // empty methods + /// number of orbitals is determined only by initial request + inline void setOrbitalSetSize(int norbs) override {} + + /// unimplemented functions call this to abort + inline void not_implemented(const std::string& method) + { + APP_ABORT("SHOSet::" + method + " has not been implemented."); + } + + // methods to be implemented in the future (possibly) + void evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& dddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet) override; + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& ddlogdet, + GGGMatrix& dddlogdet) override; + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override; + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& dphi, + HessMatrix& ddphi, + GradMatrix& dlapl_phi) override; +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h index b8b99e451b..3ac5ba0dea 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussian.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussian.h @@ -14,6 +14,8 @@ #include "OhmmsData/AttributeSet.h" #include "VariableSet.h" +#include "QMCWaveFunctions/OptimizableObject.h" + #include namespace qmcplusplus @@ -28,7 +30,6 @@ class CountingGaussian using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // enumerations for axis parameters enum A_vars diff --git a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h index 3543b80270..2a46a3f76d 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h +++ b/src/QMCWaveFunctions/Jastrow/CountingGaussianRegion.h @@ -30,7 +30,6 @@ class CountingGaussianRegion using TensorType = QMCTraits::TensorType; using real_type = optimize::VariableSet::real_type; - using opt_variables_type = optimize::VariableSet; // counting function pointers std::vector> C; diff --git a/src/QMCWaveFunctions/Jastrow/CountingJastrow.h b/src/QMCWaveFunctions/Jastrow/CountingJastrow.h index eb0e10b867..178d23d8c8 100644 --- a/src/QMCWaveFunctions/Jastrow/CountingJastrow.h +++ b/src/QMCWaveFunctions/Jastrow/CountingJastrow.h @@ -16,6 +16,7 @@ #include "Particle/ParticleSet.h" #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "QMCWaveFunctions/Jastrow/CountingGaussianRegion.h" +#include "QMCWaveFunctions/OptimizableObject.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h index 2d5942e745..f4e28f31f8 100644 --- a/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h +++ b/src/QMCWaveFunctions/Jastrow/eeI_JastrowBuilder.h @@ -14,11 +14,12 @@ #ifndef QMCPLUSPLUS_EEI_JASTROW_BUILDER_H #define QMCPLUSPLUS_EEI_JASTROW_BUILDER_H #include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "ParticleSet.h" namespace qmcplusplus { //forward declaration -class ParticleSet; + class eeI_JastrowBuilder : public WaveFunctionComponentBuilder { diff --git a/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h b/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h index 4c65edbedc..7c00d15a3d 100644 --- a/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h +++ b/src/QMCWaveFunctions/Jastrow/kSpaceJastrowBuilder.h @@ -16,12 +16,10 @@ #define QMCPLUSPLUS_KSPACE_JASTROW_BUILDER_H #include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" #include "QMCWaveFunctions/Jastrow/kSpaceJastrow.h" +#include "Particle/ParticleSet.h" namespace qmcplusplus { -//forward declaration -class ParticleSet; - class kSpaceJastrowBuilder : public WaveFunctionComponentBuilder { public: diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h index 3ca6015545..eb708e7794 100644 --- a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilder.h @@ -17,59 +17,11 @@ #ifndef QMCPLUSPLUS_ATOMICORBITALBUILDER_H #define QMCPLUSPLUS_ATOMICORBITALBUILDER_H - -#include "Message/MPIObjectBase.h" -#include "hdf/hdf_archive.h" -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/AOBasisBuilderT.h" namespace qmcplusplus { -/** atomic basisset builder - * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet - * - * Reimplement AtomiSPOSetBuilder.h - */ -template -class AOBasisBuilder : public MPIObjectBase -{ -public: - enum - { - DONOT_EXPAND = 0, - GAUSSIAN_EXPAND = 1, - NATURAL_EXPAND, - CARTESIAN_EXPAND, - MOD_NATURAL_EXPAND, - DIRAC_CARTESIAN_EXPAND - }; - -private: - bool addsignforM; - int expandlm; - std::string Morder; - std::string sph; - std::string basisType; - std::string elementType; - std::string Normalized; - - ///map for the radial orbitals - std::map RnlID; - - ///map for (n,l,m,s) to its quantum number index - std::map nlms_id; - -public: - AOBasisBuilder(const std::string& eName, Communicate* comm); - - bool put(xmlNodePtr cur); - bool putH5(hdf_archive& hin); - - SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; } - - std::unique_ptr createAOSet(xmlNodePtr cur); - std::unique_ptr createAOSetH5(hdf_archive& hin); - - int expandYlm(COT* aos, std::vector& all_nl, int expandlm = DONOT_EXPAND); -}; +using AOBasisBuilder = AOBasisBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp similarity index 65% rename from src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp rename to src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp index 1e5d701eac..35ded0588b 100644 --- a/src/QMCWaveFunctions/LCAO/AOBasisBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp @@ -13,21 +13,21 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "AOBasisBuilderT.h" -#include "AOBasisBuilder.h" -#include "Utilities/ProgressReportEngine.h" -#include "OhmmsData/AttributeSet.h" -#include "RadialOrbitalSetBuilder.h" -#include "SoaAtomicBasisSet.h" -#include "MultiQuinticSpline1D.h" #include "MultiFunctorAdapter.h" +#include "MultiQuinticSpline1D.h" #include "Numerics/SoaCartesianTensor.h" #include "Numerics/SoaSphericalTensor.h" +#include "OhmmsData/AttributeSet.h" +#include "RadialOrbitalSetBuilder.h" +#include "SoaAtomicBasisSetT.h" +#include "Utilities/ProgressReportEngine.h" namespace qmcplusplus { template -AOBasisBuilder::AOBasisBuilder(const std::string& eName, Communicate* comm) +AOBasisBuilderT::AOBasisBuilderT(const std::string& eName, Communicate* comm) : MPIObjectBase(comm), addsignforM(false), expandlm(GAUSSIAN_EXPAND), @@ -38,7 +38,8 @@ AOBasisBuilder::AOBasisBuilder(const std::string& eName, Communicate* comm) Normalized("yes") { // mmorales: for "Cartesian Gaussian", m is an integer that maps - // the component to Gamess notation, see Numerics/CartesianTensor.h + // the component to Gamess notation, see + // Numerics/CartesianTensor.h nlms_id["n"] = q_n; nlms_id["l"] = q_l; nlms_id["m"] = q_m; @@ -46,10 +47,10 @@ AOBasisBuilder::AOBasisBuilder(const std::string& eName, Communicate* comm) } template -bool AOBasisBuilder::put(xmlNodePtr cur) +bool AOBasisBuilderT::put(xmlNodePtr cur) { ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)"); - //Register valid attributes attributes + // Register valid attributes attributes OhmmsAttributeSet aAttrib; aAttrib.add(basisType, "type"); aAttrib.add(sph, "angular"); @@ -59,7 +60,7 @@ bool AOBasisBuilder::put(xmlNodePtr cur) aAttrib.put(cur); PRE.echo(cur); if (sph == "spherical") - addsignforM = 1; //include (-1)^m + addsignforM = 1; // include (-1)^m if (Morder == "gaussian") expandlm = GAUSSIAN_EXPAND; @@ -73,7 +74,8 @@ bool AOBasisBuilder::put(xmlNodePtr cur) addsignforM = 1; if (sph != "spherical") { - myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with angular='spherical'. Aborting.\n"); + myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); } } @@ -88,7 +90,8 @@ bool AOBasisBuilder::put(xmlNodePtr cur) expandlm = DIRAC_CARTESIAN_EXPAND; addsignforM = 0; if (sph != "cartesian") - myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with angular='cartesian'. Aborting\n"); + myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); } // Numerical basis is a special case @@ -99,7 +102,7 @@ bool AOBasisBuilder::put(xmlNodePtr cur) } template -bool AOBasisBuilder::putH5(hdf_archive& hin) +bool AOBasisBuilderT::putH5(hdf_archive& hin) { ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)"); std::string CenterID, basisName; @@ -122,7 +125,7 @@ bool AOBasisBuilder::putH5(hdf_archive& hin) myComm->bcast(addsignforM); if (sph == "spherical") - addsignforM = 1; //include (-1)^m + addsignforM = 1; // include (-1)^m if (Morder == "gaussian") expandlm = GAUSSIAN_EXPAND; @@ -136,7 +139,8 @@ bool AOBasisBuilder::putH5(hdf_archive& hin) addsignforM = 1; if (sph != "spherical") { - myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with angular='spherical'. Aborting.\n"); + myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with " + "angular='spherical'. Aborting.\n"); } } @@ -151,7 +155,8 @@ bool AOBasisBuilder::putH5(hdf_archive& hin) expandlm = DIRAC_CARTESIAN_EXPAND; addsignforM = 0; if (sph != "cartesian") - myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with angular='cartesian'. Aborting\n"); + myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with " + "angular='cartesian'. Aborting\n"); } app_log() << R"(::putH5(hdf_archive& hin) return true; } - template -std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) +std::unique_ptr AOBasisBuilderT::createAOSet(xmlNodePtr cur) { ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)"); app_log() << " AO BasisSet for " << elementType << "\n"; @@ -184,13 +188,19 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; break; case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the exception of L=1 (1,-1,0)" << std::endl; + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; break; case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx y^ly z^lz according to Gamess" << std::endl; + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; break; case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in DIRAC ordering" << std::endl; + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; break; default: app_log() << " Angular momentum m is explicitly given." << std::endl; @@ -198,9 +208,10 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) QuantumNumberType nlms; std::string rnl; - int Lmax(0); //maxmimum angular momentum of this center - int num(0); //the number of localized basis functions of this center - //process the basic property: maximun angular momentum, the number of basis functions to be added + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center + // process the basic property: maximun angular momentum, the number of basis + // functions to be added std::vector radGroup; xmlNodePtr cur1 = cur->xmlChildrenNode; xmlNodePtr gptr = 0; @@ -212,7 +223,7 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) radGroup.push_back(cur1); const int l = std::stoi(getXMLAttributeValue(cur1, "l")); Lmax = std::max(Lmax, l); - //expect that only Rnl is given + // expect that only Rnl is given if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) num += (l + 1) * (l + 2) / 2; else if (expandlm) @@ -227,16 +238,16 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) cur1 = cur1->next; } - //create a new set of atomic orbitals sharing a center with (Lmax, num) - //if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) auto aos = std::make_unique(Lmax, addsignforM); aos->LM.resize(num); aos->NL.resize(num); - //Now, add distinct Radial Orbitals and (l,m) channels + // Now, add distinct Radial Orbitals and (l,m) channels RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGrid(gptr, basisType); //assign a radial grid for the new center + radFuncBuilder.addGrid(gptr, basisType); // assign a radial grid for the new center std::vector::iterator it(radGroup.begin()); std::vector::iterator it_end(radGroup.end()); std::vector all_nl; @@ -248,7 +259,7 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) { std::string aname((const char*)(att->name)); if (aname == "rid" || aname == "id") - //accept id/rid + // accept id/rid { rnl = (const char*)(att->children->content); } @@ -256,14 +267,14 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) { std::map::iterator iit = nlms_id.find(aname); if (iit != nlms_id.end()) - //valid for n,l,m,s + // valid for n,l,m,s { nlms[(*iit).second] = atoi((const char*)(att->children->content)); } } att = att->next; } - //add Ylm channels + // add Ylm channels app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; std::map::iterator rnl_it = RnlID.find(rnl); if (rnl_it == RnlID.end()) @@ -290,11 +301,10 @@ std::unique_ptr AOBasisBuilder::createAOSet(xmlNodePtr cur) return aos; } - template -std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) +std::unique_ptr AOBasisBuilderT::createAOSetH5(hdf_archive& hin) { - ReportEngine PRE("AOBasisBuilder:", "createAOSetH5(std::string)"); + ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)"); app_log() << " AO BasisSet for " << elementType << "\n"; if (expandlm != CARTESIAN_EXPAND) @@ -314,13 +324,19 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) app_log() << " Angular momentum m expanded as -l, ... ,l" << std::endl; break; case (MOD_NATURAL_EXPAND): - app_log() << " Angular momentum m expanded as -l, ... ,l, with the exception of L=1 (1,-1,0)" << std::endl; + app_log() << " Angular momentum m expanded as -l, ... ,l, with the " + "exception of L=1 (1,-1,0)" + << std::endl; break; case (CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions x^lx y^ly z^lz according to Gamess" << std::endl; + app_log() << " Angular momentum expanded in cartesian functions x^lx " + "y^ly z^lz according to Gamess" + << std::endl; break; case (DIRAC_CARTESIAN_EXPAND): - app_log() << " Angular momentum expanded in cartesian functions in DIRAC ordering" << std::endl; + app_log() << " Angular momentum expanded in cartesian functions in " + "DIRAC ordering" + << std::endl; break; default: app_log() << " Angular momentum m is explicitly given." << std::endl; @@ -328,8 +344,8 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) QuantumNumberType nlms; std::string rnl; - int Lmax(0); //maxmimum angular momentum of this center - int num(0); //the number of localized basis functions of this center + int Lmax(0); // maxmimum angular momentum of this center + int num(0); // the number of localized basis functions of this center int numbasisgroups(0); if (myComm->rank() == 0) @@ -352,7 +368,7 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) myComm->bcast(l); Lmax = std::max(Lmax, l); - //expect that only Rnl is given + // expect that only Rnl is given if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND) num += (l + 1) * (l + 2) / 2; else if (expandlm) @@ -361,16 +377,16 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) num++; } - //create a new set of atomic orbitals sharing a center with (Lmax, num) - //if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) + // create a new set of atomic orbitals sharing a center with (Lmax, num) + // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm) auto aos = std::make_unique(Lmax, addsignforM); aos->LM.resize(num); aos->NL.resize(num); - //Now, add distinct Radial Orbitals and (l,m) channels + // Now, add distinct Radial Orbitals and (l,m) channels RadialOrbitalSetBuilder radFuncBuilder(myComm, *aos); radFuncBuilder.Normalized = (Normalized == "yes"); - radFuncBuilder.addGridH5(hin); //assign a radial grid for the new center + radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center std::vector all_nl; for (int i = 0; i < numbasisgroups; i++) { @@ -386,7 +402,7 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) myComm->bcast(nlms[0]); myComm->bcast(nlms[1]); - //add Ylm channels + // add Ylm channels app_log() << " R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl; std::map::iterator rnl_it = RnlID.find(rnl); if (rnl_it == RnlID.end()) @@ -415,9 +431,8 @@ std::unique_ptr AOBasisBuilder::createAOSetH5(hdf_archive& hin) return aos; } - template -int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expandlm) +int AOBasisBuilderT::expandYlm(COT* aos, std::vector& all_nl, int expandlm) { int num = 0; if (expandlm == GAUSSIAN_EXPAND) @@ -434,7 +449,7 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan aos->NL[num] = nl; num++; break; - case (1): //px(1),py(-1),pz(0) + case (1): // px(1),py(-1),pz(0) aos->LM[num] = aos->Ylm.index(1, 1); aos->NL[num] = nl; num++; @@ -445,7 +460,7 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan aos->NL[num] = nl; num++; break; - default: //0,1,-1,2,-2,...,l,-l + default: // 0,1,-1,2,-2,...,l,-l aos->LM[num] = aos->Ylm.index(l, 0); aos->NL[num] = nl; num++; @@ -471,7 +486,7 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan app_log() << " Adding " << 2 * l + 1 << " spherical orbitals" << std::endl; if (l == 1) { - //px(1),py(-1),pz(0) + // px(1),py(-1),pz(0) aos->LM[num] = aos->Ylm.index(1, 1); aos->NL[num] = nl; num++; @@ -508,7 +523,9 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan } else if (expandlm == CARTESIAN_EXPAND) { - app_log() << "Expanding Ylm (angular function) according to Gamess using cartesian gaussians" << std::endl; + app_log() << "Expanding Ylm (angular function) according to Gamess " + "using cartesian gaussians" + << std::endl; for (int nl = 0; nl < aos->RnlID.size(); nl++) { int l = aos->RnlID[nl][q_l]; @@ -526,7 +543,9 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan } else if (expandlm == DIRAC_CARTESIAN_EXPAND) { - app_log() << "Expanding Ylm (angular function) according to DIRAC using cartesian gaussians" << std::endl; + app_log() << "Expanding Ylm (angular function) according to DIRAC " + "using cartesian gaussians" + << std::endl; for (int nl = 0; nl < aos->RnlID.size(); nl++) { int l = aos->RnlID[nl][q_l]; @@ -553,252 +572,252 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan num++; break; case (2): - aos->LM[num] = nbefore + 0; //xx + aos->LM[num] = nbefore + 0; // xx aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 3; //xy + aos->LM[num] = nbefore + 3; // xy aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 4; //xz + aos->LM[num] = nbefore + 4; // xz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 1; //yy + aos->LM[num] = nbefore + 1; // yy aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 5; //yz + aos->LM[num] = nbefore + 5; // yz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 2; //zz + aos->LM[num] = nbefore + 2; // zz aos->NL[num] = nl; num++; break; case (3): - aos->LM[num] = nbefore + 0; //xxx + aos->LM[num] = nbefore + 0; // xxx aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 3; //xxy + aos->LM[num] = nbefore + 3; // xxy aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 4; //xxz + aos->LM[num] = nbefore + 4; // xxz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 5; //xyy + aos->LM[num] = nbefore + 5; // xyy aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 9; //xyz + aos->LM[num] = nbefore + 9; // xyz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 7; //xzz + aos->LM[num] = nbefore + 7; // xzz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 1; //yyy + aos->LM[num] = nbefore + 1; // yyy aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 6; //yyz + aos->LM[num] = nbefore + 6; // yyz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 8; //yzz + aos->LM[num] = nbefore + 8; // yzz aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 2; //zzz + aos->LM[num] = nbefore + 2; // zzz aos->NL[num] = nl; num++; break; case (4): - aos->LM[num] = nbefore + 0; //400 + aos->LM[num] = nbefore + 0; // 400 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 3; //310 + aos->LM[num] = nbefore + 3; // 310 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 4; //301 + aos->LM[num] = nbefore + 4; // 301 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 9; //220 + aos->LM[num] = nbefore + 9; // 220 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 12; //211 + aos->LM[num] = nbefore + 12; // 211 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 10; //202 + aos->LM[num] = nbefore + 10; // 202 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 5; //130 + aos->LM[num] = nbefore + 5; // 130 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 13; //121 + aos->LM[num] = nbefore + 13; // 121 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 14; //112 + aos->LM[num] = nbefore + 14; // 112 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 7; //103 + aos->LM[num] = nbefore + 7; // 103 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 1; //040 + aos->LM[num] = nbefore + 1; // 040 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 6; //031 + aos->LM[num] = nbefore + 6; // 031 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 11; //022 + aos->LM[num] = nbefore + 11; // 022 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 8; //013 + aos->LM[num] = nbefore + 8; // 013 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 2; //004 + aos->LM[num] = nbefore + 2; // 004 aos->NL[num] = nl; num++; break; case (5): - aos->LM[num] = nbefore + 0; //500 + aos->LM[num] = nbefore + 0; // 500 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 3; //410 + aos->LM[num] = nbefore + 3; // 410 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 4; //401 + aos->LM[num] = nbefore + 4; // 401 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 9; //320 + aos->LM[num] = nbefore + 9; // 320 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 15; //311 + aos->LM[num] = nbefore + 15; // 311 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 10; //302 + aos->LM[num] = nbefore + 10; // 302 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 11; //230 + aos->LM[num] = nbefore + 11; // 230 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 18; //221 + aos->LM[num] = nbefore + 18; // 221 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 19; //212 + aos->LM[num] = nbefore + 19; // 212 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 13; //203 + aos->LM[num] = nbefore + 13; // 203 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 5; //140 + aos->LM[num] = nbefore + 5; // 140 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 16; //131 + aos->LM[num] = nbefore + 16; // 131 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 20; //122 + aos->LM[num] = nbefore + 20; // 122 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 17; //113 + aos->LM[num] = nbefore + 17; // 113 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 7; //104 + aos->LM[num] = nbefore + 7; // 104 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 1; //050 + aos->LM[num] = nbefore + 1; // 050 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 6; //041 + aos->LM[num] = nbefore + 6; // 041 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 12; //032 + aos->LM[num] = nbefore + 12; // 032 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 14; //023 + aos->LM[num] = nbefore + 14; // 023 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 8; //014 + aos->LM[num] = nbefore + 8; // 014 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 2; //005 + aos->LM[num] = nbefore + 2; // 005 aos->NL[num] = nl; num++; break; case (6): - aos->LM[num] = nbefore + 0; //600 + aos->LM[num] = nbefore + 0; // 600 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 3; //510 + aos->LM[num] = nbefore + 3; // 510 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 4; //501 + aos->LM[num] = nbefore + 4; // 501 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 9; //420 + aos->LM[num] = nbefore + 9; // 420 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 15; //411 + aos->LM[num] = nbefore + 15; // 411 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 10; //402 + aos->LM[num] = nbefore + 10; // 402 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 18; //330 + aos->LM[num] = nbefore + 18; // 330 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 21; //321 + aos->LM[num] = nbefore + 21; // 321 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 22; //312 + aos->LM[num] = nbefore + 22; // 312 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 19; //303 + aos->LM[num] = nbefore + 19; // 303 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 11; //240 + aos->LM[num] = nbefore + 11; // 240 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 23; //231 + aos->LM[num] = nbefore + 23; // 231 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 27; //222 + aos->LM[num] = nbefore + 27; // 222 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 25; //213 + aos->LM[num] = nbefore + 25; // 213 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 13; //204 + aos->LM[num] = nbefore + 13; // 204 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 5; //150 + aos->LM[num] = nbefore + 5; // 150 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 16; //141 + aos->LM[num] = nbefore + 16; // 141 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 24; //132 + aos->LM[num] = nbefore + 24; // 132 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 26; //123 + aos->LM[num] = nbefore + 26; // 123 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 17; //114 + aos->LM[num] = nbefore + 17; // 114 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 7; //105 + aos->LM[num] = nbefore + 7; // 105 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 1; //060 + aos->LM[num] = nbefore + 1; // 060 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 6; //051 + aos->LM[num] = nbefore + 6; // 051 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 12; //042 + aos->LM[num] = nbefore + 12; // 042 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 20; //033 + aos->LM[num] = nbefore + 20; // 033 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 14; //024 + aos->LM[num] = nbefore + 14; // 024 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 8; //015 + aos->LM[num] = nbefore + 8; // 015 aos->NL[num] = nl; num++; - aos->LM[num] = nbefore + 2; //006 + aos->LM[num] = nbefore + 2; // 006 aos->NL[num] = nl; num++; break; @@ -815,28 +834,65 @@ int AOBasisBuilder::expandYlm(COT* aos, std::vector& all_nl, int expan int nl = all_nl[ind]; int l = aos->RnlID[nl][q_l]; int m = aos->RnlID[nl][q_m]; - //assign the index for real Spherical Harmonic with (l,m) + // assign the index for real Spherical Harmonic with (l,m) aos->LM[num] = aos->Ylm.index(l, m); - //assign the index for radial orbital with (n,l) + // assign the index for radial orbital with (n,l) aos->NL[num] = nl; - //increment number of basis functions + // increment number of basis functions num++; } } return num; } -template class AOBasisBuilder< - SoaAtomicBasisSet, SoaCartesianTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet, SoaSphericalTensor>>; -template class AOBasisBuilder>, - SoaCartesianTensor>>; -template class AOBasisBuilder>, - SoaSphericalTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet>, SoaCartesianTensor>>; -template class AOBasisBuilder< - SoaAtomicBasisSet>, SoaSphericalTensor>>; +template class AOBasisBuilderT, SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT, SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT, SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT, SoaSphericalTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaCartesianTensor, std::complex>>; + +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, double>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, float>>; +template class AOBasisBuilderT< + SoaAtomicBasisSetT>, SoaSphericalTensor, std::complex>>; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h new file mode 100644 index 0000000000..144b2b4dc9 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h @@ -0,0 +1,75 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_ATOMICORBITALBUILDERT_H +#define QMCPLUSPLUS_ATOMICORBITALBUILDERT_H + + +#include "Message/MPIObjectBase.h" +#include "hdf/hdf_archive.h" +#include "QMCWaveFunctions/SPOSet.h" + +namespace qmcplusplus +{ +/** atomic basisset builder + * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet + * + * Reimplement AtomiSPOSetBuilder.h + */ +template +class AOBasisBuilderT : public MPIObjectBase +{ +public: + enum + { + DONOT_EXPAND = 0, + GAUSSIAN_EXPAND = 1, + NATURAL_EXPAND, + CARTESIAN_EXPAND, + MOD_NATURAL_EXPAND, + DIRAC_CARTESIAN_EXPAND + }; + +private: + bool addsignforM; + int expandlm; + std::string Morder; + std::string sph; + std::string basisType; + std::string elementType; + std::string Normalized; + + ///map for the radial orbitals + std::map RnlID; + + ///map for (n,l,m,s) to its quantum number index + std::map nlms_id; + +public: + AOBasisBuilderT(const std::string& eName, Communicate* comm); + + bool put(xmlNodePtr cur); + bool putH5(hdf_archive& hin); + + SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; } + + std::unique_ptr createAOSet(xmlNodePtr cur); + std::unique_ptr createAOSetH5(hdf_archive& hin); + + int expandYlm(COT* aos, std::vector& all_nl, int expandlm = DONOT_EXPAND); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrection.h b/src/QMCWaveFunctions/LCAO/CuspCorrection.h index 08c2c42219..f89779584a 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrection.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrection.h @@ -20,74 +20,15 @@ #ifndef QMCPLUSPLUS_CUSPCORRECTION_H #define QMCPLUSPLUS_CUSPCORRECTION_H -#include #include "Configuration.h" +#include "QMCWaveFunctions/LCAO/CuspCorrectionT.h" namespace qmcplusplus { -/** - * @brief Cusp correction parameters - * - * From "Scheme for adding electron-nuclear cusps to Gaussian orbitals" Ma, Towler, Drummond, and Needs - * JCP 122, 224322 (2005) - * - * Equations 7 and 8 in the paper define the correction. These are the parameters in those equations. - */ - -struct CuspCorrectionParameters -{ - using ValueType = QMCTraits::ValueType; - using RealType = QMCTraits::RealType; - - /// The cutoff radius - RealType Rc; - - /// A shift to keep correction to a single sign - RealType C; - - /// The sign of the wavefunction at the nucleus - RealType sg; - - /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8 - TinyVector alpha; - - /// Flag to indicate the correction should be recalculated - int redo; - - CuspCorrectionParameters() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) {} -}; - -/// Formulas for applying the cusp correction - -class CuspCorrection -{ - using RealType = QMCTraits::RealType; - -public: - inline RealType Rr(RealType r) const { return cparam.sg * std::exp(pr(r)); } - - inline RealType pr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[0] + alpha[1] * r + alpha[2] * r * r + alpha[3] * r * r * r + alpha[4] * r * r * r * r; - } - - inline RealType dpr(RealType r) const - { - auto& alpha = cparam.alpha; - return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + 4.0 * alpha[4] * r * r * r; - } - - inline RealType d2pr(RealType r) const - { - auto& alpha = cparam.alpha; - return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r; - } +using CuspCorrectionParameters = CuspCorrectionParametersT; - CuspCorrection(const CuspCorrectionParameters& param) : cparam(param) {} +using CuspCorrection = CuspCorrectionT; - CuspCorrectionParameters cparam; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h index d4f3208b61..a64c0c178d 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h @@ -14,271 +14,7 @@ #ifndef QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H #define QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTOR_H -#include "LCAOrbitalSet.h" -#include "LCAOrbitalSetWithCorrection.h" -#include "CuspCorrection.h" - -class Communicate; -namespace qmcplusplus -{ - -class ParticleSet; -/// Broadcast cusp correction parameters -void broadcastCuspInfo(CuspCorrectionParameters& param, Communicate& Comm, int root); - -class OneMolecularOrbital -{ - using RealType = QMCTraits::RealType; - using ValueType = QMCTraits::ValueType; - using GradType = QMCTraits::GradType; - using ValueVector = OrbitalSetTraits::ValueVector; - using GradVector = OrbitalSetTraits::GradVector; - using SPOSetPtr = SPOSet*; - -public: - RealType phi(RealType r) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateValue(*targetPtcl, 0, val1); - - return val1[curOrb]; - } - - void phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap) - { - TinyVector dr = 0; - dr[0] = r; - - targetPtcl->R[0] = sourcePtcl->R[curCenter]; - targetPtcl->makeMove(0, dr); - Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1); - - val = val1[curOrb]; - grad = grad1[curOrb]; - lap = lap1[curOrb]; - } - - OneMolecularOrbital(ParticleSet* targetP, ParticleSet* sourceP, SPOSetPtr Phi) - : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), curCenter(0) - { - Psi1 = Phi; - int norb = Psi1->getOrbitalSetSize(); - val1.resize(norb); - grad1.resize(norb); - lap1.resize(norb); - } - - void changeOrbital(int centerIdx, int orbIdx) - { - curCenter = centerIdx; - curOrb = orbIdx; - } - -private: - /// Temporary storage for real wavefunction values - ValueVector val1; - GradVector grad1; - ValueVector lap1; - - /// target ParticleSet - ParticleSet* targetPtcl; - /// source ParticleSet - ParticleSet* sourcePtcl; - - /// Index of orbital - int curOrb; - - /// Index of atomic center - int curCenter; - - SPOSetPtr Psi1; -}; - -/// Read cusp correction parameters from XML file -bool readCuspInfo(const std::string& cuspInfoFile, - const std::string& objectName, - int OrbitalSetSize, - Matrix& info); - -/// save cusp correction info to a file. -void saveCusp(const std::string& filename, const Matrix& info, const std::string& id); - -/// Divide molecular orbital into atomic S-orbitals on this center (phi), and everything else (eta). -void splitPhiEta(int center, const std::vector& corrCenter, LCAOrbitalSet& phi, LCAOrbitalSet& eta); - -/// Remove S atomic orbitals from all molecular orbitals on all centers. -void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSet& Phi); - -/// Compute the radial part of the corrected wavefunction -void computeRadialPhiBar(ParticleSet* targetP, - ParticleSet* sourceP, - int curOrb_, - int curCenter_, - SPOSet* Phi, - Vector& xgrid, - Vector& rad_orb, - const CuspCorrectionParameters& data); - -using RealType = QMCTraits::RealType; -using ValueType = QMCTraits::ValueType; -using GradType = QMCTraits::GradType; -using ValueVector = OrbitalSetTraits::ValueVector; - -/** Ideal local energy at one point - * @param r input radial distance - * @param Z nuclear charge - * @param beta0 adjustable parameter to make energy continuous at Rc - */ -RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0); - -/** Ideal local energy at a vector of points - * @param pos input vector of radial distances - * @param Z nuclear charge - * @param Rc cutoff radius where the correction meets the actual orbital - * @param ELorigAtRc local energy at Rc. beta0 is adjusted to make energy continuous at Rc - * @param ELideal - output the ideal local energy at pos values - */ -void getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal); - -/** Evaluate various orbital quantities that enter as constraints on the correction - * @param valRc orbital value at Rc - * @param gradRc orbital gradient at Rc - * @param lapRc orbital laplacian at Rc - * @param Rc cutoff radius - * @param Z nuclear charge - * @param C offset to keep correction to a single sign - * @param valAtZero orbital value at zero - * @param eta0 value of non-corrected pieces of the orbital at zero - * @param X output - */ -void evalX(RealType valRc, - GradType gradRc, - ValueType lapRc, - RealType Rc, - RealType Z, - RealType C, - RealType valAtZero, - RealType eta0, - TinyVector& X); - -/** Convert constraints to polynomial parameters - * @param X input from evalX - * @param Rc cutoff radius - * @param alpha output the polynomial parameters for the correction - */ -void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha); - -/** Effective nuclear charge to keep effective local energy finite at zero - * @param Z nuclear charge - * @param etaAtZero value of non-S orbitals at this center - * @param phiBarAtZero value of corrected orbital at zero - */ -RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero); - -RealType phiBar(const CuspCorrection& cusp, RealType r, OneMolecularOrbital& phiMO); - -/** Compute effective local energy at vector of points - * @param pos input vector of radial distances - * @param Zeff effective charge from getZeff - * @param Rc cutoff radius - * @param originalELatRc Local energy at the center from the uncorrected orbital - * @param cusp cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param ELcurr output local energy at each distance in pos - */ -void getCurrentLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - RealType originalELatRc, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValueVector& ELcurr); - -/** Local energy from uncorrected orbital - * @param pos input vector of radial distances - * @param Zeff nuclear charge - * @param Rc cutoff radius - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param ELorig output local energy at each distance in pos - * - * Return is value of local energy at zero. This is the value needed for subsequent computations. - * The routine can be called with an empty vector of positions to get just this value. - */ -RealType getOriginalLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - OneMolecularOrbital& phiMO, - ValueVector& Elorig); - -/** Sum of squares difference between the current and ideal local energies - * This is the objective function to be minimized. - * @param Elcurr current local energy - * @param Elideal ideal local energy - */ -RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal); - - -/** Minimize chi2 with respect to phi at zero for a fixed Rc - * @param cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param Z nuclear charge - * @param eta0 value at zero for parts of the orbital that don't require correction - the non-S-orbitals on this center and all orbitals on other centers - * @param pos vector of radial positions - * @param Elcurr storage for current local energy - * @param Elideal storage for ideal local energy - */ -RealType minimizeForPhiAtZero(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - RealType start_phi0); - - -/** Minimize chi2 with respect to Rc and phi at zero. - * @param cusp correction parameters - * @param phiMO uncorrected orbital (S-orbitals on this center only) - * @param Z nuclear charge - * @param Rc_init initial value for Rc - * @param Rc_max maximum value for Rc - * @param eta0 value at zero for parts of the orbital that don't require correction - the non-S-orbitals on this center and all orbitals on other centers - * @param pos vector of radial positions - * @param Elcurr storage for current local energy - * @param Elideal storage for ideal local energy - * - * Output is parameter values in cusp.cparam - */ -void minimizeForRc(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType Rc_init, - RealType Rc_max, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal); - -// Modifies orbital set lcwc -void applyCuspCorrection(const Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSet& lcao, - SoaCuspCorrection& cusp, - const std::string& id); - -void generateCuspInfo(Matrix& info, - const ParticleSet& targetPtcl, - const ParticleSet& sourcePtcl, - const LCAOrbitalSet& lcao, - const std::string& id, - Communicate& Comm); - -} // namespace qmcplusplus +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h" #endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp similarity index 65% rename from src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp rename to src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp index 588f323eff..a827160a4e 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp @@ -6,227 +6,33 @@ // // File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory // Ye Luo, yeluo@anl.gov, Argonne National Laboratory -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore +// National Laboratory Jeremy McMinnis, jmcminis@gmail.com, +// University of Illinois at Urbana-Champaign Mark A. +// Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory // // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory ////////////////////////////////////////////////////////////////////////////////////// +#include "CuspCorrectionConstructionT.h" -#include "CuspCorrectionConstruction.h" #include "Message/Communicate.h" -#include "SoaCuspCorrectionBasisSet.h" -#include "Utilities/FairDivide.h" -#include "SoaLocalizedBasisSet.h" -#include "SoaAtomicBasisSet.h" #include "MultiQuinticSpline1D.h" #include "Numerics/MinimizeOneDim.h" #include "OhmmsData/AttributeSet.h" - +#include "SoaAtomicBasisSet.h" +#include "SoaCuspCorrectionBasisSet.h" +#include "SoaLocalizedBasisSet.h" +#include "Utilities/FairDivide.h" namespace qmcplusplus { -bool readCuspInfo(const std::string& cuspInfoFile, - const std::string& objectName, - int OrbitalSetSize, - Matrix& info) +template +void CuspCorrectionConstructionT::splitPhiEta(int center, + const std::vector& corrCenter, + LCAOrbitalSetT& Phi, + LCAOrbitalSetT& Eta) { - bool success = true; - int ncenter = info.rows(); - app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl; - Libxml2Document adoc; - if (!adoc.parse(cuspInfoFile)) - { - app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; - } - xmlNodePtr head = adoc.getRoot(); - head = head->children; - xmlNodePtr cur = NULL, ctr; - while (head != NULL) - { - std::string cname(getNodeName(head)); - if (cname == "sposet") - { - std::string name; - OhmmsAttributeSet spoAttrib; - spoAttrib.add(name, "name"); - spoAttrib.put(head); - if (name == objectName) - { - cur = head; - break; - } - } - head = head->next; - } - if (cur == NULL) - { - app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; - app_log() << "Recalculating data.\n"; - return false; - } - else - { - app_log() << "Found precomputed cusp data for spo set: " << objectName << std::endl; - } - cur = cur->children; - while (cur != NULL) - { - std::string cname(getNodeName(cur)); - if (cname == "center") - { - int num = -1; - OhmmsAttributeSet Attrib; - Attrib.add(num, "num"); - Attrib.put(cur); - if (num < 0 || num >= ncenter) - { - APP_ABORT("Error with cusp info xml block. incorrect center number. \n"); - } - ctr = cur->children; - while (ctr != NULL) - { - std::string cname(getNodeName(ctr)); - if (cname == "orbital") - { - int orb = -1; - OhmmsAttributeSet orbAttrib; - QMCTraits::RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; - orbAttrib.add(orb, "num"); - orbAttrib.add(a1, "redo"); - orbAttrib.add(a2, "C"); - orbAttrib.add(a3, "sg"); - orbAttrib.add(a4, "rc"); - orbAttrib.add(a5, "a1"); - orbAttrib.add(a6, "a2"); - orbAttrib.add(a7, "a3"); - orbAttrib.add(a8, "a4"); - orbAttrib.add(a9, "a5"); - orbAttrib.put(ctr); - if (orb < OrbitalSetSize) - { - info(num, orb).redo = a1; - info(num, orb).C = a2; - info(num, orb).sg = a3; - info(num, orb).Rc = a4; - info(num, orb).alpha[0] = a5; - info(num, orb).alpha[1] = a6; - info(num, orb).alpha[2] = a7; - info(num, orb).alpha[3] = a8; - info(num, orb).alpha[4] = a9; - } - } - ctr = ctr->next; - } - } - cur = cur->next; - } - return success; -} - -void saveCusp(const std::string& filename, const Matrix& info, const std::string& id) -{ - const int num_centers = info.rows(); - const int orbital_set_size = info.cols(); - xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); - xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); - xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet"); - xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str()); - xmlAddChild(cuspRoot, spo); - xmlDocSetRootElement(doc, cuspRoot); - - for (int center_idx = 0; center_idx < num_centers; center_idx++) - { - xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center"); - std::ostringstream num; - num << center_idx; - xmlNewProp(ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str()); - - for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) - { - std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5; - xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital"); - num0 << mo_idx; - xmlNewProp(orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str()); - - - C.setf(std::ios::scientific, std::ios::floatfield); - C.precision(14); - C << info(center_idx, mo_idx).C; - sg.setf(std::ios::scientific, std::ios::floatfield); - sg.precision(14); - sg << info(center_idx, mo_idx).sg; - rc.setf(std::ios::scientific, std::ios::floatfield); - rc.precision(14); - rc << info(center_idx, mo_idx).Rc; - a1.setf(std::ios::scientific, std::ios::floatfield); - a1.precision(14); - a1 << info(center_idx, mo_idx).alpha[0]; - a2.setf(std::ios::scientific, std::ios::floatfield); - a2.precision(14); - a2 << info(center_idx, mo_idx).alpha[1]; - a3.setf(std::ios::scientific, std::ios::floatfield); - a3.precision(14); - a3 << info(center_idx, mo_idx).alpha[2]; - a4.setf(std::ios::scientific, std::ios::floatfield); - a4.precision(14); - a4 << info(center_idx, mo_idx).alpha[3]; - a5.setf(std::ios::scientific, std::ios::floatfield); - a5.precision(14); - a5 << info(center_idx, mo_idx).alpha[4]; - xmlNewProp(orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str()); - xmlNewProp(orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str()); - xmlAddChild(ctr, orb); - } - xmlAddChild(spo, ctr); - } - - app_log() << "Saving resulting cusp Info xml block to: " << filename << std::endl; - xmlSaveFormatFile(filename.c_str(), doc, 1); - xmlFreeDoc(doc); -} - -void broadcastCuspInfo(CuspCorrectionParameters& param, Communicate& Comm, int root) -{ -#ifdef HAVE_MPI - std::vector buffer(9); - buffer[0] = param.Rc; - buffer[1] = param.C; - buffer[2] = param.sg; - buffer[3] = param.alpha[0]; - buffer[4] = param.alpha[1]; - buffer[5] = param.alpha[2]; - buffer[6] = param.alpha[3]; - buffer[7] = param.alpha[4]; - buffer[8] = param.redo; - - Comm.comm.broadcast(buffer.begin(), buffer.end(), root); - - param.Rc = buffer[0]; - param.C = buffer[1]; - param.sg = buffer[2]; - param.alpha[0] = buffer[3]; - param.alpha[1] = buffer[4]; - param.alpha[2] = buffer[5]; - param.alpha[3] = buffer[6]; - param.alpha[4] = buffer[7]; - param.redo = buffer[8] == 0.0 ? 0 : 1; -#endif -} - -void splitPhiEta(int center, const std::vector& corrCenter, LCAOrbitalSet& Phi, LCAOrbitalSet& Eta) -{ - using RealType = QMCTraits::RealType; - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); std::vector correct_this_center(corrCenter.size(), false); correct_this_center[center] = corrCenter[center]; @@ -242,21 +48,20 @@ void splitPhiEta(int center, const std::vector& corrCenter, LCAOrbitalSet& { auto& cref(*(Eta.C)); for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; //Eta->C(k,i) = 0.0; + cref(k, i) = 0.0; // Eta->C(k,i) = 0.0; } else { auto& cref(*(Phi.C)); for (int k = 0; k < nOrbs; k++) - cref(k, i) = 0.0; //Phi->C(k,i) = 0.0; + cref(k, i) = 0.0; // Phi->C(k,i) = 0.0; } } } -void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSet& Phi) +template +void CuspCorrectionConstructionT::removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSetT& Phi) { - using RealType = QMCTraits::RealType; - std::vector is_s_orbital(Phi.myBasisSet->BasisSetSize, false); Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital); @@ -275,20 +80,21 @@ void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSet& Phi } } - -// Will be the corrected value for r < rc and the original wavefunction for r > rc -void computeRadialPhiBar(ParticleSet* targetP, - ParticleSet* sourceP, - int curOrb_, - int curCenter_, - SPOSet* Phi, - Vector& xgrid, - Vector& rad_orb, - const CuspCorrectionParameters& data) +// Will be the corrected value for r < rc and the original wavefunction for r > +// rc +template +void CuspCorrectionConstructionT::computeRadialPhiBar(ParticleSetT* targetP, + ParticleSetT* sourceP, + int curOrb_, + int curCenter_, + SPOSetT* Phi, + Vector& xgrid, + Vector& rad_orb, + const CuspCorrectionParametersT& data) { - OneMolecularOrbital phiMO(targetP, sourceP, Phi); + OneMolecularOrbitalT phiMO(targetP, sourceP, Phi); phiMO.changeOrbital(curCenter_, curOrb_); - CuspCorrection cusp(data); + CuspCorrectionT cusp(data); for (int i = 0; i < xgrid.size(); i++) { @@ -296,11 +102,12 @@ void computeRadialPhiBar(ParticleSet* targetP, } } -using RealType = QMCTraits::RealType; - // Get the ideal local energy at one point // Eq. 17 in the paper. Coefficients are taken from the paper. -RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getOneIdealLocalEnergy(RealType r, + RealType Z, + RealType beta0) { RealType beta[7] = {3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692}; RealType idealEL = beta0; @@ -314,7 +121,12 @@ RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0) } // Get the ideal local energy for a vector of positions -void getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal) +template +void CuspCorrectionConstructionT::getIdealLocalEnergy(const ValueVector& pos, + RealType Z, + RealType Rc, + RealType ELorigAtRc, + ValueVector& ELideal) { // assert(pos.size() == ELideal.size() RealType beta0 = 0.0; @@ -327,15 +139,16 @@ void getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc, RealTy } // Evaluate constraints. Equations 9-13 in the paper. -void evalX(RealType valRc, - GradType gradRc, - ValueType lapRc, - RealType Rc, - RealType Z, - RealType C, - RealType valAtZero, - RealType eta0, - TinyVector& X) +template +void CuspCorrectionConstructionT::evalX(RealType valRc, + GradType gradRc, + ValueType lapRc, + RealType Rc, + RealType Z, + RealType C, + RealType valAtZero, + RealType eta0, + TinyVector& X) { X[0] = std::log(std::abs(valRc - C)); X[1] = gradRc[0] / (valRc - C); @@ -345,7 +158,10 @@ void evalX(RealType valRc, } // Compute polynomial coefficients from constraints. Eq. 14 in the paper. -void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha) +template +void CuspCorrectionConstructionT::X2alpha(const TinyVector& X, + RealType Rc, + TinyVector& alpha) { RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv; alpha[0] = X[4]; @@ -359,9 +175,18 @@ void X2alpha(const TinyVector& X, RealType Rc, TinyVector +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getZeff(RealType Z, + RealType etaAtZero, + RealType phiBarAtZero) +{ + return Z * (1.0 + etaAtZero / phiBarAtZero); +} -RealType phiBar(const CuspCorrection& cusp, RealType r, OneMolecularOrbital& phiMO) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::phiBar(const CuspCorrectionT& cusp, + RealType r, + OneMolecularOrbitalT& phiMO) { if (r <= cusp.cparam.Rc) return cusp.cparam.C + cusp.Rr(r); @@ -371,13 +196,14 @@ RealType phiBar(const CuspCorrection& cusp, RealType r, OneMolecularOrbital& phi // Compute the effective one-electron local energy at a vector of points. // Eq. 15 in the paper for r < Rc. Normal local energy for R > Rc. -void getCurrentLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - RealType originalELatRc, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValueVector& ELcurr) +template +void CuspCorrectionConstructionT::getCurrentLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + RealType originalELatRc, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValueVector& ELcurr) { // assert(pos.size() == ELcurr.size()); ValueType val; @@ -405,11 +231,13 @@ void getCurrentLocalEnergy(const ValueVector& pos, } // Return value is local energy at Rc -RealType getOriginalLocalEnergy(const ValueVector& pos, - RealType Zeff, - RealType Rc, - OneMolecularOrbital& phiMO, - ValueVector& ELorig) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getOriginalLocalEnergy( + const ValueVector& pos, + RealType Zeff, + RealType Rc, + OneMolecularOrbitalT& phiMO, + ValueVector& ELorig) { // assert(pos.size() == ELorig.size()); @@ -427,9 +255,12 @@ RealType getOriginalLocalEnergy(const ValueVector& pos, return -0.5 * lap / val - Zeff / Rc; } -// Sum of squares difference between the current local energy and the ideal local energy. +// Sum of squares difference between the current local energy and the ideal +// local energy. // This is the objective function to minimize. -RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::getELchi2(const ValueVector& ELcurr, + const ValueVector& ELideal) { assert(ELcurr.size() == ELideal.size()); @@ -442,24 +273,19 @@ RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal) return chi2; } -struct ValGradLap -{ - ValueType val; - GradType grad; - ValueType lap; -}; - // Compute the chi squared distance given a value for phi at zero. -RealType evaluateForPhi0Body(RealType phi0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - ValGradLap phiAtRc, - RealType etaAtZero, - RealType ELorigAtRc, - RealType Z) +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::evaluateForPhi0Body( + RealType phi0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValGradLap phiAtRc, + RealType etaAtZero, + RealType ELorigAtRc, + RealType Z) { cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0; cusp.cparam.C = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0; @@ -472,16 +298,18 @@ RealType evaluateForPhi0Body(RealType phi0, return chi2; } -// Optimize free parameter (value of phi at zero) to minimize distance to ideal local energy. -// Output is return value and parameter values are in cusp.cparam -RealType minimizeForPhiAtZero(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal, - RealType start_phi0) +// Optimize free parameter (value of phi at zero) to minimize distance to ideal +// local energy. Output is return value and parameter values are in cusp.cparam +template +typename CuspCorrectionConstructionT::RealType CuspCorrectionConstructionT::minimizeForPhiAtZero( + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + RealType start_phi0) { ValGradLap vglAtRc; ValueVector tmp_pos(0); @@ -517,19 +345,19 @@ RealType minimizeForPhiAtZero(CuspCorrection& cusp, return min_res.second; } - -// Optimize the cutoff radius. There is an inner loop optimizing for phi0 for each value of Rc. -// Elcurr and ELideal are expected to have the correct size on input (same size as pos) -// Output is parameter values in cusp.cparam -void minimizeForRc(CuspCorrection& cusp, - OneMolecularOrbital& phiMO, - RealType Z, - RealType Rc_init, - RealType Rc_max, - RealType eta0, - ValueVector& pos, - ValueVector& ELcurr, - ValueVector& ELideal) +// Optimize the cutoff radius. There is an inner loop optimizing for phi0 for +// each value of Rc. Elcurr and ELideal are expected to have the correct size on +// input (same size as pos) Output is parameter values in cusp.cparam +template +void CuspCorrectionConstructionT::minimizeForRc(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType Rc_init, + RealType Rc_max, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal) { Bracket_min_t bracket(Rc_init, 0.0, 0.0, false); RealType start_phi0 = phiMO.phi(0.0); @@ -547,7 +375,6 @@ void minimizeForRc(CuspCorrection& cusp, APP_ABORT("Bracketing minimum failed for finding rc. \n"); } - if (bracket.success) { auto min_res = find_minimum( @@ -565,34 +392,34 @@ void minimizeForRc(CuspCorrection& cusp, } // Modifies orbital set lcwc -void applyCuspCorrection(const Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSet& lcao, - SoaCuspCorrection& cusp, - const std::string& id) +template +void CuspCorrectionConstructionT::applyCuspCorrection(const Matrix>& info, + ParticleSetT& targetPtcl, + ParticleSetT& sourcePtcl, + LCAOrbitalSetT& lcao, + SoaCuspCorrectionT& cusp, + const std::string& id) { const int num_centers = info.rows(); const int orbital_set_size = info.cols(); - using RealType = QMCTraits::RealType; + using RealType = typename SPOSetT::RealType; NewTimer& cuspApplyTimer = createGlobalTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium); ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); + LCAOrbitalSetT phi("phi", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); + LCAOrbitalSetT eta("eta", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); std::vector corrCenter(num_centers, "true"); - //What's this grid's lifespan? Why on the heap? + // What's this grid's lifespan? Why on the heap? auto radial_grid = std::make_unique>(); radial_grid->set(0.000001, 100.0, 1001); - Vector xgrid; Vector rad_orb; xgrid.resize(radial_grid->size()); @@ -613,11 +440,11 @@ void applyCuspCorrection(const Matrix& info, // the loop is inside cot - in the multiqunitic auto cot = std::make_unique>(); cot->initializeRadialSet(*radial_grid, orbital_set_size); - //How is this useful? - // cot->ID.resize(orbital_set_size); - // for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { - // cot->ID[mo_idx] = mo_idx; - // } + // How is this useful? + // cot->ID.resize(orbital_set_size); + // for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { + // cot->ID[mo_idx] = mo_idx; + // } for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) { @@ -658,16 +485,18 @@ void applyCuspCorrection(const Matrix& info, removeSTypeOrbitals(corrCenter, lcao); } -void generateCuspInfo(Matrix& info, - const ParticleSet& targetPtcl, - const ParticleSet& sourcePtcl, - const LCAOrbitalSet& lcao, - const std::string& id, - Communicate& Comm) +template +void CuspCorrectionConstructionT::generateCuspInfo(Matrix>& info, + const ParticleSetT& targetPtcl, + const ParticleSetT& sourcePtcl, + const LCAOrbitalSetT& lcao, + const std::string& id, + Communicate& Comm) { const int num_centers = info.rows(); const int orbital_set_size = info.cols(); - using RealType = QMCTraits::RealType; + using RealType = typename SPOSetT::RealType; + using ValueVector = typename SPOSetT::ValueVector; NewTimer& cuspCreateTimer = createGlobalTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium); NewTimer& splitPhiEtaTimer = createGlobalTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine); @@ -675,13 +504,12 @@ void generateCuspInfo(Matrix& info, ScopedTimer createCuspTimerWrapper(cuspCreateTimer); - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); + LCAOrbitalSetT phi("phi", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); + LCAOrbitalSetT eta("eta", std::unique_ptr::basis_type>(lcao.myBasisSet->makeClone())); eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); - std::vector corrCenter(num_centers, "true"); using GridType = OneDimGridBase; @@ -695,20 +523,22 @@ void generateCuspInfo(Matrix& info, int end_mo = offset[Comm.rank() + 1]; app_log() << " Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl; -// Specify dynamic scheduling explicitly for load balancing. Each iteration should take enough -// time that scheduling overhead is not an issue. +// Specify dynamic scheduling explicitly for load balancing. Each iteration +// should take enough time that scheduling overhead is not an issue. #pragma omp parallel for schedule(dynamic) collapse(2) for (int center_idx = 0; center_idx < num_centers; center_idx++) { for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) { - ParticleSet localTargetPtcl(targetPtcl); - ParticleSet localSourcePtcl(sourcePtcl); + ParticleSetT localTargetPtcl(targetPtcl); + ParticleSetT localSourcePtcl(sourcePtcl); - LCAOrbitalSet local_phi("local_phi", std::unique_ptr(phi.myBasisSet->makeClone())); + LCAOrbitalSetT local_phi("local_phi", + std::unique_ptr::basis_type>(phi.myBasisSet->makeClone())); local_phi.setOrbitalSetSize(phi.getOrbitalSetSize()); - LCAOrbitalSet local_eta("local_eta", std::unique_ptr(eta.myBasisSet->makeClone())); + LCAOrbitalSetT local_eta("local_eta", + std::unique_ptr::basis_type>(eta.myBasisSet->makeClone())); local_eta.setOrbitalSetSize(eta.getOrbitalSetSize()); #pragma omp critical @@ -735,10 +565,10 @@ void generateCuspInfo(Matrix& info, if (corrO) { - OneMolecularOrbital etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta); + OneMolecularOrbitalT etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta); etaMO.changeOrbital(center_idx, mo_idx); - OneMolecularOrbital phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi); + OneMolecularOrbitalT phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi); phiMO.changeOrbital(center_idx, mo_idx); SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet()); @@ -759,14 +589,14 @@ void generateCuspInfo(Matrix& info, RealType eta0 = etaMO.phi(0.0); ValueVector ELorig(npts); - CuspCorrection cusp(info(center_idx, mo_idx)); + CuspCorrectionT cusp(info(center_idx, mo_idx)); { ScopedTimer local_timer(computeTimer); minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal); } - // Update shared object. Each iteration accesses a different element and - // this is an array (no bookkeeping data to update), so no synchronization - // is necessary. + // Update shared object. Each iteration accesses a different + // element and this is an array (no bookkeeping data to update), + // so no synchronization is necessary. info(center_idx, mo_idx) = cusp.cparam; } } @@ -786,4 +616,211 @@ void generateCuspInfo(Matrix& info, } } +template +void CuspCorrectionConstructionT::broadcastCuspInfo(CuspCorrectionParametersT& param, Communicate& Comm, int root) +{ +#ifdef HAVE_MPI + std::vector buffer(9); + buffer[0] = param.Rc; + buffer[1] = param.C; + buffer[2] = param.sg; + buffer[3] = param.alpha[0]; + buffer[4] = param.alpha[1]; + buffer[5] = param.alpha[2]; + buffer[6] = param.alpha[3]; + buffer[7] = param.alpha[4]; + buffer[8] = param.redo; + + Comm.comm.broadcast(buffer.begin(), buffer.end(), root); + + param.Rc = buffer[0]; + param.C = buffer[1]; + param.sg = buffer[2]; + param.alpha[0] = buffer[3]; + param.alpha[1] = buffer[4]; + param.alpha[2] = buffer[5]; + param.alpha[3] = buffer[6]; + param.alpha[4] = buffer[7]; + param.redo = buffer[8] == 0.0 ? 0 : 1; +#endif +} + +template +bool CuspCorrectionConstructionT::readCuspInfo(const std::string& cuspInfoFile, + const std::string& objectName, + int OrbitalSetSize, + Matrix>& info) +{ + bool success = true; + int ncenter = info.rows(); + app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl; + Libxml2Document adoc; + if (!adoc.parse(cuspInfoFile)) + { + app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; + app_log() << "Recalculating data.\n"; + return false; + } + xmlNodePtr head = adoc.getRoot(); + head = head->children; + xmlNodePtr cur = NULL, ctr; + while (head != NULL) + { + std::string cname(getNodeName(head)); + if (cname == "sposet") + { + std::string name; + OhmmsAttributeSet spoAttrib; + spoAttrib.add(name, "name"); + spoAttrib.put(head); + if (name == objectName) + { + cur = head; + break; + } + } + head = head->next; + } + if (cur == NULL) + { + app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl; + app_log() << "Recalculating data.\n"; + return false; + } + else + { + app_log() << "Found precomputed cusp data for spo set: " << objectName << std::endl; + } + cur = cur->children; + while (cur != NULL) + { + std::string cname(getNodeName(cur)); + if (cname == "center") + { + int num = -1; + OhmmsAttributeSet Attrib; + Attrib.add(num, "num"); + Attrib.put(cur); + if (num < 0 || num >= ncenter) + { + APP_ABORT("Error with cusp info xml block. incorrect center " + "number. \n"); + } + ctr = cur->children; + while (ctr != NULL) + { + std::string cname(getNodeName(ctr)); + if (cname == "orbital") + { + int orb = -1; + OhmmsAttributeSet orbAttrib; + RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9; + orbAttrib.add(orb, "num"); + orbAttrib.add(a1, "redo"); + orbAttrib.add(a2, "C"); + orbAttrib.add(a3, "sg"); + orbAttrib.add(a4, "rc"); + orbAttrib.add(a5, "a1"); + orbAttrib.add(a6, "a2"); + orbAttrib.add(a7, "a3"); + orbAttrib.add(a8, "a4"); + orbAttrib.add(a9, "a5"); + orbAttrib.put(ctr); + if (orb < OrbitalSetSize) + { + info(num, orb).redo = a1; + info(num, orb).C = a2; + info(num, orb).sg = a3; + info(num, orb).Rc = a4; + info(num, orb).alpha[0] = a5; + info(num, orb).alpha[1] = a6; + info(num, orb).alpha[2] = a7; + info(num, orb).alpha[3] = a8; + info(num, orb).alpha[4] = a9; + } + } + ctr = ctr->next; + } + } + cur = cur->next; + } + return success; +} + +template +void CuspCorrectionConstructionT::saveCusp(const std::string& filename, + const Matrix>& info, + const std::string& id) +{ + const int num_centers = info.rows(); + const int orbital_set_size = info.cols(); + xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); + xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); + xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet"); + xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str()); + xmlAddChild(cuspRoot, spo); + xmlDocSetRootElement(doc, cuspRoot); + + for (int center_idx = 0; center_idx < num_centers; center_idx++) + { + xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center"); + std::ostringstream num; + num << center_idx; + xmlNewProp(ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str()); + + for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) + { + std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5; + xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital"); + num0 << mo_idx; + xmlNewProp(orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str()); + + C.setf(std::ios::scientific, std::ios::floatfield); + C.precision(14); + C << info(center_idx, mo_idx).C; + sg.setf(std::ios::scientific, std::ios::floatfield); + sg.precision(14); + sg << info(center_idx, mo_idx).sg; + rc.setf(std::ios::scientific, std::ios::floatfield); + rc.precision(14); + rc << info(center_idx, mo_idx).Rc; + a1.setf(std::ios::scientific, std::ios::floatfield); + a1.precision(14); + a1 << info(center_idx, mo_idx).alpha[0]; + a2.setf(std::ios::scientific, std::ios::floatfield); + a2.precision(14); + a2 << info(center_idx, mo_idx).alpha[1]; + a3.setf(std::ios::scientific, std::ios::floatfield); + a3.precision(14); + a3 << info(center_idx, mo_idx).alpha[2]; + a4.setf(std::ios::scientific, std::ios::floatfield); + a4.precision(14); + a4 << info(center_idx, mo_idx).alpha[3]; + a5.setf(std::ios::scientific, std::ios::floatfield); + a5.precision(14); + a5 << info(center_idx, mo_idx).alpha[4]; + xmlNewProp(orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str()); + xmlNewProp(orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str()); + xmlAddChild(ctr, orb); + } + xmlAddChild(spo, ctr); + } + + app_log() << "Saving resulting cusp Info xml block to: " << filename << std::endl; + xmlSaveFormatFile(filename.c_str(), doc, 1); + xmlFreeDoc(doc); +} + +#ifndef MIXED_PRECISION +template class CuspCorrectionConstructionT; +#else +template class CuspCorrectionConstructionT; +#endif + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h new file mode 100644 index 0000000000..b96b4d4e40 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h @@ -0,0 +1,328 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTORT_H +#define QMCPLUSPLUS_CUSP_CORRECTION_CONSTRUCTORT_H + +#include "CuspCorrectionT.h" +#include "LCAOrbitalSetT.h" +#include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "SoaCuspCorrectionT.h" + +class Communicate; +namespace qmcplusplus +{ + +template +class ParticleSetT; + +template +class OneMolecularOrbitalT +{ +public: + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using GradType = typename OrbitalSetTraits::GradType; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using GradVector = typename OrbitalSetTraits::GradVector; + using SPOSetPtr = SPOSetT*; + + ValueType phi(RealType r) + { + TinyVector dr = 0; + dr[0] = r; + + targetPtcl->R[0] = sourcePtcl->R[curCenter]; + targetPtcl->makeMove(0, dr); + Psi1->evaluateValue(*targetPtcl, 0, val1); + + return val1[curOrb]; + } + + void phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap) + { + TinyVector dr = 0; + dr[0] = r; + + targetPtcl->R[0] = sourcePtcl->R[curCenter]; + targetPtcl->makeMove(0, dr); + Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1); + + val = val1[curOrb]; + grad = grad1[curOrb]; + lap = lap1[curOrb]; + } + + OneMolecularOrbitalT(ParticleSetT* targetP, ParticleSetT* sourceP, SPOSetPtr Phi) + : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), curCenter(0) + { + Psi1 = Phi; + int norb = Psi1->getOrbitalSetSize(); + val1.resize(norb); + grad1.resize(norb); + lap1.resize(norb); + } + + void changeOrbital(int centerIdx, int orbIdx) + { + curCenter = centerIdx; + curOrb = orbIdx; + } + +private: + /// Temporary storage for real wavefunction values + ValueVector val1; + GradVector grad1; + ValueVector lap1; + + /// target ParticleSet + ParticleSetT* targetPtcl; + /// source ParticleSet + ParticleSetT* sourcePtcl; + + /// Index of orbital + int curOrb; + + /// Index of atomic center + int curCenter; + + SPOSetPtr Psi1; +}; + +template +class CuspCorrectionConstructionT +{ +public: + using RealType = typename OrbitalSetTraits::RealType; + using ValueType = typename OrbitalSetTraits::ValueType; + using ValueVector = typename OrbitalSetTraits::ValueVector; + using GradType = typename OrbitalSetTraits::GradType; + using GradVector = typename OrbitalSetTraits::GradVector; + + struct ValGradLap + { + ValueType val; + GradType grad; + ValueType lap; + }; + + /// Divide molecular orbital into atomic S-orbitals on this center (phi), + /// and everything else (eta). + static void splitPhiEta(int center, + const std::vector& corrCenter, + LCAOrbitalSetT& phi, + LCAOrbitalSetT& eta); + + /// Remove S atomic orbitals from all molecular orbitals on all centers. + static void removeSTypeOrbitals(const std::vector& corrCenter, LCAOrbitalSetT& Phi); + + /// Compute the radial part of the corrected wavefunction + static void computeRadialPhiBar(ParticleSetT* targetP, + ParticleSetT* sourceP, + int curOrb_, + int curCenter_, + SPOSetT* Phi, + Vector& xgrid, + Vector& rad_orb, + const CuspCorrectionParametersT& data); + + /** Ideal local energy at one point + * @param r input radial distance + * @param Z nuclear charge + * @param beta0 adjustable parameter to make energy continuous at Rc + */ + static RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0); + + /** Ideal local energy at a vector of points + * @param pos input vector of radial distances + * @param Z nuclear charge + * @param Rc cutoff radius where the correction meets the actual orbital + * @param ELorigAtRc local energy at Rc. beta0 is adjusted to make energy + * continuous at Rc + * @param ELideal - output the ideal local energy at pos values + */ + static void getIdealLocalEnergy(const ValueVector& pos, + RealType Z, + RealType Rc, + RealType ELorigAtRc, + ValueVector& ELideal); + + /** Evaluate various orbital quantities that enter as constraints on the + * correction + * @param valRc orbital value at Rc + * @param gradRc orbital gradient at Rc + * @param lapRc orbital laplacian at Rc + * @param Rc cutoff radius + * @param Z nuclear charge + * @param C offset to keep correction to a single sign + * @param valAtZero orbital value at zero + * @param eta0 value of non-corrected pieces of the orbital at zero + * @param X output + */ + static void evalX(RealType valRc, + GradType gradRc, + ValueType lapRc, + RealType Rc, + RealType Z, + RealType C, + RealType valAtZero, + RealType eta0, + TinyVector& X); + + /** Convert constraints to polynomial parameters + * @param X input from evalX + * @param Rc cutoff radius + * @param alpha output the polynomial parameters for the correction + */ + static void X2alpha(const TinyVector& X, RealType Rc, TinyVector& alpha); + + /** Effective nuclear charge to keep effective local energy finite at zero + * @param Z nuclear charge + * @param etaAtZero value of non-S orbitals at this center + * @param phiBarAtZero value of corrected orbital at zero + */ + static RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero); + + static RealType phiBar(const CuspCorrectionT& cusp, RealType r, OneMolecularOrbitalT& phiMO); + + /** Compute effective local energy at vector of points + * @param pos input vector of radial distances + * @param Zeff effective charge from getZeff + * @param Rc cutoff radius + * @param originalELatRc Local energy at the center from the uncorrected + * orbital + * @param cusp cusp correction parameters + * @param phiMO uncorrected orbital (S-orbitals on this center only) + * @param ELcurr output local energy at each distance in pos + */ + static void getCurrentLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + RealType originalELatRc, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValueVector& ELcurr); + + /** Local energy from uncorrected orbital + * @param pos input vector of radial distances + * @param Zeff nuclear charge + * @param Rc cutoff radius + * @param phiMO uncorrected orbital (S-orbitals on this center only) + * @param ELorig output local energy at each distance in pos + * + * Return is value of local energy at zero. This is the value needed for + * subsequent computations. The routine can be called with an empty vector + * of positions to get just this value. + */ + static RealType getOriginalLocalEnergy(const ValueVector& pos, + RealType Zeff, + RealType Rc, + OneMolecularOrbitalT& phiMO, + ValueVector& Elorig); + + /** Sum of squares difference between the current and ideal local energies + * This is the objective function to be minimized. + * @param Elcurr current local energy + * @param Elideal ideal local energy + */ + static RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal); + + /** Minimize chi2 with respect to phi at zero for a fixed Rc + * @param cusp correction parameters + * @param phiMO uncorrected orbital (S-orbitals on this center only) + * @param Z nuclear charge + * @param eta0 value at zero for parts of the orbital that don't require + * correction - the non-S-orbitals on this center and all orbitals on other + * centers + * @param pos vector of radial positions + * @param Elcurr storage for current local energy + * @param Elideal storage for ideal local energy + */ + static RealType minimizeForPhiAtZero(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + RealType start_phi0); + + /** Minimize chi2 with respect to Rc and phi at zero. + * @param cusp correction parameters + * @param phiMO uncorrected orbital (S-orbitals on this center only) + * @param Z nuclear charge + * @param Rc_init initial value for Rc + * @param Rc_max maximum value for Rc + * @param eta0 value at zero for parts of the orbital that don't require + * correction - the non-S-orbitals on this center and all orbitals on other + * centers + * @param pos vector of radial positions + * @param Elcurr storage for current local energy + * @param Elideal storage for ideal local energy + * + * Output is parameter values in cusp.cparam + */ + static void minimizeForRc(CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + RealType Z, + RealType Rc_init, + RealType Rc_max, + RealType eta0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal); + + // Modifies orbital set lcwc + static void applyCuspCorrection(const Matrix>& info, + ParticleSetT& targetPtcl, + ParticleSetT& sourcePtcl, + LCAOrbitalSetT& lcao, + SoaCuspCorrectionT& cusp, + const std::string& id); + + static void generateCuspInfo(Matrix>& info, + const ParticleSetT& targetPtcl, + const ParticleSetT& sourcePtcl, + const LCAOrbitalSetT& lcao, + const std::string& id, + Communicate& Comm); + + /// Broadcast cusp correction parameters + static void broadcastCuspInfo(CuspCorrectionParametersT& param, Communicate& Comm, int root); + + /// Read cusp correction parameters from XML file + static bool readCuspInfo(const std::string& cuspInfoFile, + const std::string& objectName, + int OrbitalSetSize, + Matrix>& info); + + /// save cusp correction info to a file. + static void saveCusp(const std::string& filename, + const Matrix>& info, + const std::string& id); + +private: + static RealType evaluateForPhi0Body(RealType phi0, + ValueVector& pos, + ValueVector& ELcurr, + ValueVector& ELideal, + CuspCorrectionT& cusp, + OneMolecularOrbitalT& phiMO, + ValGradLap phiAtRc, + RealType etaAtZero, + RealType ELorigAtRc, + RealType Z); +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h new file mode 100644 index 0000000000..f98fd819fa --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h @@ -0,0 +1,98 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +/** @file CuspCorrection.h + * @brief Corrections to electron-nucleus cusp for all-electron molecular + * calculations. + */ + +#ifndef QMCPLUSPLUS_CUSPCORRECTIONT_H +#define QMCPLUSPLUS_CUSPCORRECTIONT_H + +#include "Configuration.h" +#include "OrbitalSetTraits.h" + +#include + +namespace qmcplusplus +{ +/** + * @brief Cusp correction parameters + * + * From "Scheme for adding electron-nuclear cusps to Gaussian orbitals" Ma, + * Towler, Drummond, and Needs JCP 122, 224322 (2005) + * + * Equations 7 and 8 in the paper define the correction. These are the + * parameters in those equations. + */ + +template +struct CuspCorrectionParametersT +{ + using ValueType = typename OrbitalSetTraits::ValueType; + using RealType = typename OrbitalSetTraits::RealType; + + /// The cutoff radius + RealType Rc; + + /// A shift to keep correction to a single sign + RealType C; + + /// The sign of the wavefunction at the nucleus + RealType sg; + + /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8 + TinyVector alpha; + + /// Flag to indicate the correction should be recalculated + int redo; + + CuspCorrectionParametersT() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) {} +}; + +/// Formulas for applying the cusp correction + +template +class CuspCorrectionT +{ + using RealType = typename OrbitalSetTraits::RealType; + +public: + inline RealType Rr(RealType r) const { return cparam.sg * std::exp(pr(r)); } + + inline RealType pr(RealType r) const + { + auto& alpha = cparam.alpha; + return alpha[0] + alpha[1] * r + alpha[2] * r * r + alpha[3] * r * r * r + alpha[4] * r * r * r * r; + } + + inline RealType dpr(RealType r) const + { + auto& alpha = cparam.alpha; + return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + 4.0 * alpha[4] * r * r * r; + } + + inline RealType d2pr(RealType r) const + { + auto& alpha = cparam.alpha; + return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r; + } + + CuspCorrectionT(const CuspCorrectionParametersT& param) : cparam(param) {} + + CuspCorrectionParametersT cparam; +}; +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h index 4c1d490b8b..0d6ff2118f 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.h @@ -13,51 +13,12 @@ #ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H #define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDER_H -#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" namespace qmcplusplus { -/** @file LCAOSpinorBuidler.h - * - * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to read up and - * down channel from HDF5 and construct SpinorSet - * - */ -class LCAOSpinorBuilder : public LCAOrbitalBuilder -{ -public: - /** constructor - * \param els reference to the electrons - * \param ions reference to the ions - * - * Derives from LCAOrbitalBuilder, but will require an h5_path to be set - */ - LCAOSpinorBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); - - /** creates and returns SpinorSet - * - * Creates an up and down LCAOrbitalSet - * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file - * registers up and down into a SpinorSet and returns - */ - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -private: - /** load the up and down MO sets - * - * checks to make sure not PBC and initialize the Occ vector. - * call putFromH5 to parse the up and down MO coefficients - */ - bool loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur); +using LCAOSpinorBuilder = LCAOSpinorBuilderT; - /** parse h5 file for spinor info - * - * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part of up component of spinor - * assumes the h5 file as KPTS_0/eigenset_1(_imag) for the real/imag part of dn component of spinor - * reads the various coefficient matricies and broadcast - * after this, we have up/dn LCAOrbitalSet that can be registered to the SpinorSet - */ - bool putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr); -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp similarity index 55% rename from src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp rename to src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp index 8906c0f42c..3c333e304c 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp @@ -9,27 +9,33 @@ // File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories ////////////////////////////////////////////////////////////////////////////////////// -#include "LCAOSpinorBuilder.h" -#include "QMCWaveFunctions/SpinorSet.h" +#include "LCAOSpinorBuilderT.h" + +#include "Message/CommOperators.h" #include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/SpinorSetT.h" #include "Utilities/ProgressReportEngine.h" #include "hdf/hdf_archive.h" -#include "Message/CommOperators.h" namespace qmcplusplus { -LCAOSpinorBuilder::LCAOSpinorBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) - : LCAOrbitalBuilder(els, ions, comm, cur) +template +LCAOSpinorBuilderT::LCAOSpinorBuilderT(ParticleSetT& els, + ParticleSetT& ions, + Communicate* comm, + xmlNodePtr cur) + : LCAOrbitalBuilderT(els, ions, comm, cur) { - ClassName = "LCAOSpinorBuilder"; + this->ClassName = "LCAOSpinorBuilder"; - if (h5_path == "") - myComm->barrier_and_abort("LCAOSpinorBuilder only works with href"); + if (this->h5_path == "") + this->myComm->barrier_and_abort("LCAOSpinorBuilder only works with href"); } -std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) +template +std::unique_ptr> LCAOSpinorBuilderT::createSPOSetFromXML(xmlNodePtr cur) { - ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); std::string spo_name(""), optimize("no"); std::string basisset_name("LCAOBSet"); OhmmsAttributeSet spoAttrib; @@ -39,28 +45,29 @@ std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) spoAttrib.put(cur); BasisSet_t* myBasisSet = nullptr; - if (basisset_map_.find(basisset_name) == basisset_map_.end()) - myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); + if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end()) + this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); else - myBasisSet = basisset_map_[basisset_name].get(); + myBasisSet = this->basisset_map_[basisset_name].get(); if (optimize == "yes") app_log() << " SPOSet " << spo_name << " is optimizable\n"; - std::unique_ptr upspo = - std::make_unique(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); - std::unique_ptr dnspo = - std::make_unique(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); + auto upspo = + std::make_unique>(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); + auto dnspo = + std::make_unique>(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); loadMO(*upspo, *dnspo, cur); - //create spinor and register up/dn - auto spinor_set = std::make_unique(spo_name); + // create spinor and register up/dn + auto spinor_set = std::make_unique>(spo_name); spinor_set->set_spos(std::move(upspo), std::move(dnspo)); return spinor_set; } -bool LCAOSpinorBuilder::loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur) +template +bool LCAOSpinorBuilderT::loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur) { bool PBC = false; int norb = up.getBasisSetSize(); @@ -85,23 +92,23 @@ bool LCAOSpinorBuilder::loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur = cur->next; } - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing or incorrect path to H5 file."); + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing " + "or incorrect path to H5 file."); hin.push("PBC"); PBC = false; hin.read(PBC, "PBC"); hin.close(); } - myComm->bcast(PBC); + this->myComm->bcast(PBC); if (PBC) - myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); + this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC"); bool success = putFromH5(up, dn, occ_ptr); - if (debugc == "yes") { app_log() << "UP: Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols() @@ -114,28 +121,30 @@ bool LCAOSpinorBuilder::loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr return success; } -bool LCAOSpinorBuilder::putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr occ_ptr) +template +bool LCAOSpinorBuilderT::putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr occ_ptr) { -#ifdef QMC_COMPLEX if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) { - myComm->barrier_and_abort("LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); + this->myComm->barrier_and_abort("LCASpinorBuilder::loadMO detected ZERO BasisSetSize"); return false; } bool success = true; - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { - if (!hin.open(h5_path, H5F_ACC_RDONLY)) - myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or incorrect path to H5 file"); + if (!hin.open(this->h5_path, H5F_ACC_RDONLY)) + this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or " + "incorrect path to H5 file"); Matrix upReal; Matrix upImag; std::string setname = "/Super_Twist/eigenset_0"; - readRealMatrixFromH5(hin, setname, upReal); + this->readRealMatrixFromH5(hin, setname, upReal); setname += "_imag"; - readRealMatrixFromH5(hin, setname, upImag); + this->readRealMatrixFromH5(hin, setname, upImag); + assert(upReal.rows() == upImag.rows()); assert(upReal.cols() == upImag.cols()); @@ -145,16 +154,16 @@ bool LCAOSpinorBuilder::putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodeP { for (int j = 0; j < upTemp.cols(); j++) { - upTemp[i][j] = ValueType(upReal[i][j], upImag[i][j]); + upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]}; } } Matrix dnReal; Matrix dnImag; setname = "/Super_Twist/eigenset_1"; - readRealMatrixFromH5(hin, setname, dnReal); + this->readRealMatrixFromH5(hin, setname, dnReal); setname += "_imag"; - readRealMatrixFromH5(hin, setname, dnImag); + this->readRealMatrixFromH5(hin, setname, dnImag); assert(dnReal.rows() == dnImag.rows()); assert(dnReal.cols() == dnImag.cols()); @@ -171,15 +180,15 @@ bool LCAOSpinorBuilder::putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodeP assert(upReal.rows() == dnReal.rows()); assert(upReal.cols() == dnReal.cols()); - Occ.resize(upReal.rows()); - success = putOccupation(up, occ_ptr); + this->Occ.resize(upReal.rows()); + success = this->putOccupation(up, occ_ptr); int norbs = up.getOrbitalSetSize(); int n = 0, i = 0; while (i < norbs) { - if (Occ[n] > 0.0) + if (this->Occ[n] > 0.0) { std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]); std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]); @@ -192,15 +201,18 @@ bool LCAOSpinorBuilder::putFromH5(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodeP } #ifdef HAVE_MPI - myComm->comm.broadcast_n(up.C->data(), up.C->size()); - myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); -#endif - -#else - myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 Must build with QMC_COMPLEX"); + this->myComm->comm.broadcast_n(up.C->data(), up.C->size()); + this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size()); #endif return success; } +#ifdef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class LCAOSpinorBuilderT>; +#else +template class LCAOSpinorBuilderT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h new file mode 100644 index 0000000000..721da148f4 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h @@ -0,0 +1,68 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// +// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H +#define QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H + +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" + +namespace qmcplusplus +{ +/** @file LCAOSpinorBuidler.h + * + * Derives from LCAOrbitalBuilder.h. Overrides createSPOSetFromXML method to + * read up and down channel from HDF5 and construct SpinorSet + * + */ +template +class LCAOSpinorBuilderT : public LCAOrbitalBuilderT +{ +public: + using BasisSet_t = typename LCAOrbitalBuilderT::BasisSet_t; + using RealType = typename LCAOrbitalBuilderT::RealType; + using ValueType = typename LCAOrbitalBuilderT::ValueType; + + /** constructor + * \param els reference to the electrons + * \param ions reference to the ions + * + * Derives from LCAOrbitalBuilder, but will require an h5_path to be set + */ + LCAOSpinorBuilderT(ParticleSetT& els, ParticleSetT& ions, Communicate* comm, xmlNodePtr cur); + + /** creates and returns SpinorSet + * + * Creates an up and down LCAOrbitalSet + * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file + * registers up and down into a SpinorSet and returns + */ + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + +private: + /** load the up and down MO sets + * + * checks to make sure not PBC and initialize the Occ vector. + * call putFromH5 to parse the up and down MO coefficients + */ + bool loadMO(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr cur); + + /** parse h5 file for spinor info + * + * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part + * of up component of spinor assumes the h5 file as KPTS_0/eigenset_1(_imag) + * for the real/imag part of dn component of spinor reads the various + * coefficient matricies and broadcast after this, we have up/dn + * LCAOrbitalSet that can be registered to the SpinorSet + */ + bool putFromH5(LCAOrbitalSetT& up, LCAOrbitalSetT& dn, xmlNodePtr); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h index a20a37e302..5522647968 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h @@ -18,107 +18,11 @@ #ifndef QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDER_H #define QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDER_H -#include -#include "QMCWaveFunctions/BasisSetBase.h" -#include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" namespace qmcplusplus { -/** SPOSetBuilder using new LCAOrbitalSet and Soa versions - * - * Reimplement MolecularSPOSetBuilder - * - support both CartesianTensor and SphericalTensor - */ -class LCAOrbitalBuilder : public SPOSetBuilder -{ -public: - using BasisSet_t = LCAOrbitalSet::basis_type; - /** constructor - * \param els reference to the electrons - * \param ions reference to the ions - */ - LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur); - ~LCAOrbitalBuilder() override; - std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) override; - -protected: - ///target ParticleSet - ParticleSet& targetPtcl; - ///source ParticleSet - ParticleSet& sourcePtcl; - /// localized basis set map - std::map> basisset_map_; - /// if true, add cusp correction to orbitals - bool cuspCorr; - ///Path to HDF5 Wavefunction - std::string h5_path; - ///Number of periodic Images for Orbital evaluation - TinyVector PBCImages; - ///Coordinates Super Twist - PosType SuperTwist; - ///Periodic Image Phase Factors. Correspond to the phase from the PBCImages. Computed only once. - Vector> PeriodicImagePhaseFactors; - Array> PeriodicImageDisplacements; - ///Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors - Tensor Lattice; - - /// Enable cusp correction - bool doCuspCorrection; - - /** create basis set - * - * Use ao_traits to match (ROT)x(SH) combo - */ - template - BasisSet_t* createBasisSet(xmlNodePtr cur); - template - BasisSet_t* createBasisSetH5(); - - // The following items were previously in SPOSet - ///occupation number - Vector Occ; - bool loadMO(LCAOrbitalSet& spo, xmlNodePtr cur); - bool putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr); - bool putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - bool putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - bool putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr); - // the dimensions of Ctemp are determined by the dataset on file - void LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix>& Ctemp, - bool MultiDet); - // the dimensions of Creal are determined by the dataset on file - void LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, Matrix& Creal, bool Multidet); - void EvalPeriodicImagePhaseFactors( - PosType SuperTwist, - Vector>& LocPeriodicImagePhaseFactors, - Array>& LocPeriodicImageDisplacements); - void EvalPeriodicImagePhaseFactors( - PosType SuperTwist, - Vector, OffloadPinnedAllocator>>& LocPeriodicImagePhaseFactors, - Array>& LocPeriodicImageDisplacements); - /** read matrix from h5 file - * \param[in] hin: hdf5 arhive to be read from - * \param setname: where to read from in hdf5 archive - * \param[out] Creal: matrix read from h5 - * - * added in header to allow use from derived class LCAOSpinorBuilder as well - */ - void readRealMatrixFromH5(hdf_archive& hin, - const std::string& setname, - Matrix& Creal) const; - -private: - ///load a basis set from XML input - std::unique_ptr loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); - ///load a basis set from h5 file - std::unique_ptr loadBasisSetFromH5(xmlNodePtr parent); - ///determine radial orbital type based on "keyword" and "transform" attributes - int determineRadialOrbType(xmlNodePtr cur) const; -}; - - +using LCAOrbitalBuilder = LCAOrbitalBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp similarity index 60% rename from src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp rename to src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp index 30dd986c11..84b2b3d9a8 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp @@ -14,39 +14,37 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// +#include "LCAOrbitalBuilderT.h" -#include "LCAOrbitalBuilder.h" -#include "OhmmsData/AttributeSet.h" -#include "QMCWaveFunctions/SPOSet.h" +#include "AOBasisBuilderT.h" +#include "CPU/math.hpp" +#include "CuspCorrectionConstructionT.h" +#include "LCAOrbitalSetT.h" +#include "LCAOrbitalSetWithCorrectionT.h" +#include "Message/CommOperators.h" +#include "MultiFunctorAdapter.h" #include "MultiQuinticSpline1D.h" #include "Numerics/SoaCartesianTensor.h" #include "Numerics/SoaSphericalTensor.h" -#include "SoaAtomicBasisSet.h" +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "SoaAtomicBasisSetT.h" #include "SoaLocalizedBasisSet.h" -#include "LCAOrbitalSet.h" -#include "AOBasisBuilder.h" -#include "MultiFunctorAdapter.h" -#if !defined(QMC_COMPLEX) -#include "LCAOrbitalSetWithCorrection.h" -#include "CuspCorrectionConstruction.h" -#endif -#include "hdf/hdf_archive.h" -#include "Message/CommOperators.h" #include "Utilities/ProgressReportEngine.h" -#include "CPU/math.hpp" +#include "hdf/hdf_archive.h" #include namespace qmcplusplus { /** traits for a localized basis set; used by createBasisSet - * - * T radial function value type - * ORBT orbital value type, can be complex - * ROT {0=numuerica;, 1=gto; 2=sto} - * SH {0=cartesian, 1=spherical} - * If too confusing, inroduce enumeration. - */ + * + * T radial function value type + * ORBT orbital value type, can be complex + * ROT {0=numuerica;, 1=gto; 2=sto} + * SH {0=cartesian, 1=spherical} + * If too confusing, inroduce enumeration. + */ template struct ao_traits {}; @@ -57,7 +55,7 @@ struct ao_traits { using radial_type = MultiQuinticSpline1D; using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; + using ao_type = SoaAtomicBasisSetT; using basis_type = SoaLocalizedBasisSet; }; @@ -67,7 +65,7 @@ struct ao_traits { using radial_type = MultiQuinticSpline1D; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; + using ao_type = SoaAtomicBasisSetT; using basis_type = SoaLocalizedBasisSet; }; @@ -77,7 +75,7 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaCartesianTensor; - using ao_type = SoaAtomicBasisSet; + using ao_type = SoaAtomicBasisSetT; using basis_type = SoaLocalizedBasisSet; }; @@ -87,7 +85,7 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; + using ao_type = SoaAtomicBasisSetT; using basis_type = SoaLocalizedBasisSet; }; @@ -97,25 +95,26 @@ struct ao_traits { using radial_type = MultiFunctorAdapter>; using angular_type = SoaSphericalTensor; - using ao_type = SoaAtomicBasisSet; + using ao_type = SoaAtomicBasisSetT; using basis_type = SoaLocalizedBasisSet; }; - inline bool is_same(const xmlChar* a, const char* b) { return !strcmp((const char*)a, b); } -using BasisSet_t = LCAOrbitalSet::basis_type; - -LCAOrbitalBuilder::LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Communicate* comm, xmlNodePtr cur) - : SPOSetBuilder("LCAO", comm), +template +LCAOrbitalBuilderT::LCAOrbitalBuilderT(ParticleSetT& els, + ParticleSetT& ions, + Communicate* comm, + xmlNodePtr cur) + : SPOSetBuilderT("LCAO", comm), targetPtcl(els), sourcePtcl(ions), h5_path(""), SuperTwist(0.0), doCuspCorrection(false) { - ClassName = "LCAOrbitalBuilder"; - ReportEngine PRE(ClassName, "createBasisSet"); + this->ClassName = "LCAOrbitalBuilder"; + ReportEngine PRE(this->ClassName, "createBasisSet"); std::string cuspC("no"); // cusp correction OhmmsAttributeSet aAttrib; @@ -162,12 +161,14 @@ LCAOrbitalBuilder::LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Commun throw std::runtime_error("No basisset found in the XML input!"); } -LCAOrbitalBuilder::~LCAOrbitalBuilder() +template +LCAOrbitalBuilderT::~LCAOrbitalBuilderT() { - //properly cleanup + // properly cleanup } -int LCAOrbitalBuilder::determineRadialOrbType(xmlNodePtr cur) const +template +int LCAOrbitalBuilderT::determineRadialOrbType(xmlNodePtr cur) const { std::string keyOpt; std::string transformOpt; @@ -190,9 +191,12 @@ int LCAOrbitalBuilder::determineRadialOrbType(xmlNodePtr cur) const return radialOrbType; } -std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent) +template +std::unique_ptr::BasisSet_t> LCAOrbitalBuilderT::loadBasisSetFromXML( + xmlNodePtr cur, + xmlNodePtr parent) { - ReportEngine PRE(ClassName, "loadBasisSetFromXML(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "loadBasisSetFromXML(xmlNodePtr)"); int ylm = -1; { xmlNodePtr cur1 = cur->xmlChildrenNode; @@ -216,51 +220,55 @@ std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromXML(xmlNodePtr cu int radialOrbType = determineRadialOrbType(cur); if (radialOrbType < 0) { - app_warning() << "Radial orbital type cannot be determined based on the attributes of basisset line. " + app_warning() << "Radial orbital type cannot be determined based on " + "the attributes of basisset line. " << "Trying the parent element." << std::endl; radialOrbType = determineRadialOrbType(parent); } if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify keyword=\"NMO/GTO/STO\" .", true); + PRE.error("Unknown radial function for LCAO orbitals. Specify " + "keyword=\"NMO/GTO/STO\" .", + true); BasisSet_t* myBasisSet = nullptr; /** process atomicBasisSet per ion species */ switch (radialOrbType) { - case (0): //numerical - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (0): // numerical + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSet<0, 1>(cur); else myBasisSet = createBasisSet<0, 0>(cur); break; - case (1): //gto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (1): // gto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSet<1, 1>(cur); else myBasisSet = createBasisSet<1, 0>(cur); break; - case (2): //sto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (2): // sto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; myBasisSet = createBasisSet<2, 1>(cur); break; default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); break; } return std::unique_ptr(myBasisSet); } -std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromH5(xmlNodePtr parent) +template +std::unique_ptr::BasisSet_t> LCAOrbitalBuilderT::loadBasisSetFromH5(xmlNodePtr parent) { - ReportEngine PRE(ClassName, "loadBasisSetFromH5()"); + ReportEngine PRE(this->ClassName, "loadBasisSetFromH5()"); - hdf_archive hin(myComm); + hdf_archive hin(this->myComm); int ylm = -1; - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) PRE.error("Could not open H5 file", true); @@ -273,65 +281,69 @@ std::unique_ptr LCAOrbitalBuilder::loadBasisSetFromH5(xmlNodePtr par hin.push(ElemID0.c_str(), false); if (!hin.readEntry(sph, "angular")) - PRE.error("Could not find name of basisset group in H5; Probably Corrupt H5 file", true); + PRE.error("Could not find name of basisset group in H5; Probably " + "Corrupt H5 file", + true); ylm = (sph == "cartesian") ? 0 : 1; hin.close(); } - myComm->bcast(ylm); + this->myComm->bcast(ylm); if (ylm < 0) PRE.error("Missing angular attribute of atomicBasisSet.", true); int radialOrbType = determineRadialOrbType(parent); if (radialOrbType < 0) - PRE.error("Unknown radial function for LCAO orbitals. Specify keyword=\"NMO/GTO/STO\" .", true); + PRE.error("Unknown radial function for LCAO orbitals. Specify " + "keyword=\"NMO/GTO/STO\" .", + true); BasisSet_t* myBasisSet = nullptr; /** process atomicBasisSet per ion species */ switch (radialOrbType) { - case (0): //numerical - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (0): // numerical + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSetH5<0, 1>(); else myBasisSet = createBasisSetH5<0, 0>(); break; - case (1): //gto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (1): // gto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; if (ylm) myBasisSet = createBasisSetH5<1, 1>(); else myBasisSet = createBasisSetH5<1, 0>(); break; - case (2): //sto - app_log() << " LCAO: SoaAtomicBasisSet" << std::endl; + case (2): // sto + app_log() << " LCAO: SoaAtomicBasisSetT" << std::endl; myBasisSet = createBasisSetH5<2, 1>(); break; default: - PRE.error("Cannot construct SoaAtomicBasisSet.", true); + PRE.error("Cannot construct SoaAtomicBasisSetT.", true); break; } return std::unique_ptr(myBasisSet); } - +template template -LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSet(xmlNodePtr cur) +typename LCAOrbitalBuilderT::BasisSet_t* LCAOrbitalBuilderT::createBasisSet(xmlNodePtr cur) { - ReportEngine PRE(ClassName, "createBasisSet(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "createBasisSet(xmlNodePtr)"); - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; + using ao_type = typename ao_traits::ao_type; + using basis_type = typename ao_traits::basis_type; basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - //list of built centers + // list of built centers std::vector ao_built_centers; /** process atomicBasisSet per ion species */ cur = cur->xmlChildrenNode; - while (cur != NULL) //loop over unique ioons + while (cur != NULL) // loop over unique ioons { std::string cname((const char*)(cur->name)); @@ -349,12 +361,12 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSet(xmlNodePtr cur) auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); if (it == ao_built_centers.end()) { - AOBasisBuilder any(elementType, myComm); + AOBasisBuilderT any(elementType, this->myComm); any.put(cur); auto aoBasis = any.createAOSet(cur); if (aoBasis) { - //add the new atomic basis to the basis set + // add the new atomic basis to the basis set int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); mBasisSet->add(activeCenter, std::move(aoBasis)); } @@ -368,18 +380,18 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSet(xmlNodePtr cur) return mBasisSet; } - +template template -LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() +typename LCAOrbitalBuilderT::BasisSet_t* LCAOrbitalBuilderT::createBasisSetH5() { - ReportEngine PRE(ClassName, "createBasisSetH5(xmlNodePtr)"); + ReportEngine PRE(this->ClassName, "createBasisSetH5(xmlNodePtr)"); - using ao_type = typename ao_traits::ao_type; - using basis_type = typename ao_traits::basis_type; + using ao_type = typename ao_traits::ao_type; + using basis_type = typename ao_traits::basis_type; basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl); - //list of built centers + // list of built centers std::vector ao_built_centers; int Nb_Elements(0); @@ -388,8 +400,8 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() /** process atomicBasisSet per ion species */ app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl; - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) PRE.error("Could not open H5 file", true); @@ -399,7 +411,7 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() hin.read(Nb_Elements, "NbElements"); } - myComm->bcast(Nb_Elements); + this->myComm->bcast(Nb_Elements); if (Nb_Elements < 1) PRE.error("Missing elementType attribute of atomicBasisSet.", true); @@ -411,38 +423,42 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() tempElem << ElemID0 << i; ElemType = tempElem.str(); - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) { hin.push(ElemType.c_str(), false); if (!hin.readEntry(basiset_name, "name")) - PRE.error("Could not find name of basisset group in H5; Probably Corrupt H5 file", true); + PRE.error("Could not find name of basisset group in H5; " + "Probably Corrupt H5 file", + true); if (!hin.readEntry(elementType, "elementType")) - PRE.error("Could not read elementType in H5; Probably Corrupt H5 file", true); + PRE.error("Could not read elementType in H5; Probably Corrupt " + "H5 file", + true); } - myComm->bcast(basiset_name); - myComm->bcast(elementType); + this->myComm->bcast(basiset_name); + this->myComm->bcast(elementType); auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType); if (it == ao_built_centers.end()) { - AOBasisBuilder any(elementType, myComm); + AOBasisBuilderT any(elementType, this->myComm); any.putH5(hin); auto aoBasis = any.createAOSetH5(hin); if (aoBasis) { - //add the new atomic basis to the basis set + // add the new atomic basis to the basis set int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType); mBasisSet->add(activeCenter, std::move(aoBasis)); } ao_built_centers.push_back(elementType); } - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) hin.pop(); } - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) { hin.pop(); hin.close(); @@ -451,11 +467,179 @@ LCAOrbitalBuilder::BasisSet_t* LCAOrbitalBuilder::createBasisSetH5() mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors, PeriodicImageDisplacements); return mBasisSet; } +#ifndef QMC_COMPLEX +template<> +std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection( + xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, + std::unique_ptr&& myBasisSet) +{ + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = + std::make_unique>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } +#ifndef MIXED_PRECISION + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info(num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) + { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo(cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); +#ifdef HAVE_MPI + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0); +#endif + } + else + { + CuspCorrectionConstructionT::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, + *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp(cusp_file, info, spo_name); + } + CuspCorrectionConstructionT::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, + spo_name); -std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) + return sposet; +} +#else +template<> +std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection( + xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, + std::unique_ptr&& myBasisSet) { - ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); + app_summary() << " Using cusp correction." << std::endl; + std::unique_ptr> sposet; + { + auto lcwc = + std::make_unique>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); + loadMO(lcwc->lcao, cur); + lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + sposet = std::move(lcwc); + } + + // Create a temporary particle set to use for cusp initialization. + // The particle coordinates left at the end are unsuitable for further + // computations. The coordinates get set to nuclear positions, which + // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h + // This problem only appears when the electron positions are specified + // in the input. The random particle placement step executes after this + // part of the code, overwriting the leftover positions from the cusp + // initialization. + ParticleSetT tmp_targetPtcl(targetPtcl); + + const int num_centers = sourcePtcl.getTotalNum(); + auto& lcwc = dynamic_cast&>(*sposet); + + const int orbital_set_size = lcwc.getOrbitalSetSize(); + Matrix> info(num_centers, orbital_set_size); + + // set a default file name if not given + if (cusp_file.empty()) + cusp_file = spo_name + ".cuspInfo.xml"; + + bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good()); + this->myComm->bcast(file_exists); + app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; + + // validate file if it exists + if (file_exists) + { + bool valid = 0; + if (this->myComm->rank() == 0) + valid = CuspCorrectionConstructionT::readCuspInfo(cusp_file, spo_name, orbital_set_size, info); + this->myComm->bcast(valid); + if (!valid) + this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); +#ifdef HAVE_MPI + for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) + for (int center_idx = 0; center_idx < num_centers; center_idx++) + CuspCorrectionConstructionT::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0); +#endif + } + else + { + CuspCorrectionConstructionT::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, + *this->myComm); + if (this->myComm->rank() == 0) + CuspCorrectionConstructionT::saveCusp(cusp_file, info, spo_name); + } + + CuspCorrectionConstructionT::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, + spo_name); + + return sposet; +} +#endif +#else +#ifndef MIXED_PRECISION +template<> +std::unique_ptr>> LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, + const std::string&, + std::string, + std::unique_ptr&&) +{ + this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; +} +#else +template<> +std::unique_ptr>> LCAOrbitalBuilderT>::createWithCuspCorrection( + xmlNodePtr, + const std::string&, + std::string, + std::unique_ptr&&) +{ + this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not " + "supported on complex LCAO."); + return std::unique_ptr>>{}; +} +#endif +#endif + +template +std::unique_ptr> LCAOrbitalBuilderT::createSPOSetFromXML(xmlNodePtr cur) +{ + ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)"); std::string spo_name(""), cusp_file(""), optimize("no"); std::string basisset_name("LCAOBSet"); OhmmsAttributeSet spoAttrib; @@ -467,100 +651,39 @@ std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) std::unique_ptr myBasisSet; if (basisset_map_.find(basisset_name) == basisset_map_.end()) - myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); + this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); else myBasisSet.reset(basisset_map_[basisset_name]->makeClone()); - std::unique_ptr sposet; + std::unique_ptr> sposet; if (doCuspCorrection) { -#if defined(QMC_COMPLEX) - myComm->barrier_and_abort( - "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not supported on complex LCAO."); -#else - app_summary() << " Using cusp correction." << std::endl; - auto lcwc = std::make_unique(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); - sposet = std::move(lcwc); -#endif + sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet)); } else { - auto lcos = std::make_unique(spo_name, std::move(myBasisSet)); + auto lcos = std::make_unique>(spo_name, std::move(myBasisSet)); loadMO(*lcos, cur); sposet = std::move(lcos); } -#if !defined(QMC_COMPLEX) - if (doCuspCorrection) - { - // Create a temporary particle set to use for cusp initialization. - // The particle coordinates left at the end are unsuitable for further computations. - // The coordinates get set to nuclear positions, which leads to zero e-N distance, - // which causes a NaN in SoaAtomicBasisSet.h - // This problem only appears when the electron positions are specified in the input. - // The random particle placement step executes after this part of the code, overwriting - // the leftover positions from the cusp initialization. - ParticleSet tmp_targetPtcl(targetPtcl); - - const int num_centers = sourcePtcl.getTotalNum(); - auto& lcwc = dynamic_cast(*sposet); - - const int orbital_set_size = lcwc.getOrbitalSetSize(); - Matrix info(num_centers, orbital_set_size); - - // set a default file name if not given - if (cusp_file.empty()) - cusp_file = spo_name + ".cuspInfo.xml"; - - bool file_exists(myComm->rank() == 0 && std::ifstream(cusp_file).good()); - myComm->bcast(file_exists); - app_log() << " Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl; - - // validate file if it exists - if (file_exists) - { - bool valid = 0; - if (myComm->rank() == 0) - valid = readCuspInfo(cusp_file, spo_name, orbital_set_size, info); - myComm->bcast(valid); - if (!valid) - myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file); -#ifdef HAVE_MPI - for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++) - for (int center_idx = 0; center_idx < num_centers; center_idx++) - broadcastCuspInfo(info(center_idx, orb_idx), *myComm, 0); -#endif - } - else - { - generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *myComm); - if (myComm->rank() == 0) - saveCusp(cusp_file, info, spo_name); - } - - applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name); - } -#endif - return sposet; } - /** Parse the xml file for information on the Dirac determinants. - *@param cur the current xmlNode - */ -bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) + *@param cur the current xmlNode + */ +template +bool LCAOrbitalBuilderT::loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur) { #undef FunctionName #define FunctionName \ printf("Calling FunctionName from %s\n", __FUNCTION__); \ FunctionNameReal - //Check if HDF5 present + // Check if HDF5 present ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)"); - //initialize the number of orbital by the basis set size + // initialize the number of orbital by the basis set size int norb = spo.getBasisSetSize(); std::string debugc("no"); double orbital_mix_magnitude = 0.0; @@ -598,12 +721,13 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) success = putFromXML(spo, coeff_ptr); else { - hdf_archive hin(myComm); + hdf_archive hin(this->myComm); - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect " + "path to H5 file."); try { @@ -621,7 +745,7 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) hin.close(); } - myComm->bcast(PBC); + this->myComm->bcast(PBC); if (PBC) success = putPBCFromH5(spo, coeff_ptr); else @@ -629,7 +753,7 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) } // Ye: used to construct cusp correction - //bool success2 = transformSPOSet(); + // bool success2 = transformSPOSet(); if (debugc == "yes") { app_log() << " Single-particle orbital coefficients dims=" << spo.C->rows() << " x " << spo.C->cols() @@ -640,7 +764,8 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) return success; } -bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) +template +bool LCAOrbitalBuilderT::putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { int norbs = 0; OhmmsAttributeSet aAttrib; @@ -654,12 +779,12 @@ bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) } if (norbs) { - std::vector Ctemp; + std::vector Ctemp; int BasisSetSize = spo.getBasisSetSize(); Ctemp.resize(norbs * BasisSetSize); putContent(Ctemp, coeff_ptr); int n = 0, i = 0; - std::vector::iterator cit(Ctemp.begin()); + typename std::vector::iterator cit(Ctemp.begin()); while (i < spo.getOrbitalSetSize()) { if (Occ[n] > std::numeric_limits::epsilon()) @@ -675,10 +800,11 @@ bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) } /** read data from a hdf5 file - * @param norb number of orbitals to be initialized - * @param coeff_ptr xmlnode for coefficients - */ -bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) + * @param norb number of orbitals to be initialized + * @param coeff_ptr xmlnode for coefficients + */ +template +bool LCAOrbitalBuilderT::putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { int neigs = spo.getBasisSetSize(); int setVal = -1; @@ -687,17 +813,17 @@ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) aAttrib.add(neigs, "size"); aAttrib.add(neigs, "orbitals"); aAttrib.put(coeff_ptr); - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " + "to H5 file."); Matrix Ctemp; std::array name; - - //This is to make sure of Backward compatibility with previous tags. + // This is to make sure of Backward compatibility with previous tags. int name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); if (name_len < 0) throw std::runtime_error("Error generating name"); @@ -717,7 +843,7 @@ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) std::ostringstream err_msg; err_msg << "Basis set size " << spo.getBasisSetSize() << " mismatched the number of MO coefficients columns " << Ctemp.cols() << " from h5." << std::endl; - myComm->barrier_and_abort(err_msg.str()); + this->myComm->barrier_and_abort(err_msg.str()); } int norbs = spo.getOrbitalSetSize(); @@ -726,7 +852,7 @@ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) std::ostringstream err_msg; err_msg << "Need " << norbs << " orbitals. Insufficient rows of MO coefficients " << Ctemp.rows() << " from h5." << std::endl; - myComm->barrier_and_abort(err_msg.str()); + this->myComm->barrier_and_abort(err_msg.str()); } int n = 0, i = 0; @@ -740,16 +866,16 @@ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) n++; } } - myComm->bcast(spo.C->data(), spo.C->size()); + this->myComm->bcast(spo.C->data(), spo.C->size()); return true; } - /** read data from a hdf5 file - * @param norb number of orbitals to be initialized - * @param coeff_ptr xmlnode for coefficients - */ -bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) + * @param norb number of orbitals to be initialized + * @param coeff_ptr xmlnode for coefficients + */ +template +bool LCAOrbitalBuilderT::putPBCFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr) { ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5"); int norbs = spo.getOrbitalSetSize(); @@ -764,7 +890,7 @@ bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) aAttrib.add(neigs, "size"); aAttrib.add(neigs, "orbitals"); aAttrib.put(coeff_ptr); - hdf_archive hin(myComm); + hdf_archive hin(this->myComm); xmlNodePtr curtemp = coeff_ptr; @@ -781,24 +907,26 @@ bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) ss << curtemp->name; ss >> curname; if (curname == MSDTag) - MultiDet = true; ///Used to know if running an MSD calculation - needed for order of Orbitals. + MultiDet = true; /// Used to know if running an MSD calculation - + /// needed for order of Orbitals. if (curname == SDTag) MultiDet = false; } while ((xmlTag != curname) && (curname != EndTag) && curname != "sposet_collection"); if (curname == EndTag) { - APP_ABORT( - "Could not find in wf file the \"sposet\" or \"determinant\" tags. Please verify input or contact developers"); + APP_ABORT("Could not find in wf file the \"sposet\" or \"determinant\" " + "tags. Please verify input or contact developers"); } aAttrib.add(SuperTwist, "twist"); aAttrib.put(curtemp); - if (myComm->rank() == 0) + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) - APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path to H5 file."); + APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path " + "to H5 file."); hin.push("parameters"); hin.read(IsComplex, "IsComplex"); hin.pop(); @@ -814,10 +942,11 @@ bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) app_log() << "Diff in Coord x :" << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl; app_log() << " y :" << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl; app_log() << " z :" << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl; - APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do not Match!!! Aborting."); + APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do " + "not Match!!! Aborting."); } - //SuperTwist=SuperTwistH5; - Matrix Ctemp; + // SuperTwist=SuperTwistH5; + Matrix Ctemp; LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet); int n = 0, i = 0; @@ -834,15 +963,15 @@ bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) hin.close(); } #ifdef HAVE_MPI - myComm->comm.broadcast_n(spo.C->data(), spo.C->size()); + this->myComm->comm.broadcast_n(spo.C->data(), spo.C->size()); #endif return true; } - -bool LCAOrbitalBuilder::putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr) +template +bool LCAOrbitalBuilderT::putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr) { - //die?? + // die?? if (spo.getBasisSetSize() == 0) { APP_ABORT("LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize"); @@ -864,13 +993,13 @@ bool LCAOrbitalBuilder::putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr) if (!o.empty()) occ_mode = o; } - //Do nothing if mode == ground + // Do nothing if mode == ground if (occ_mode == "excited") { putContent(occ_in, occ_ptr); for (int k = 0; k < occ_in.size(); k++) { - if (occ_in[k] < 0) //remove this, -1 is to adjust the base + if (occ_in[k] < 0) // remove this, -1 is to adjust the base Occ[-occ_in[k] - 1] = 0.0; else Occ[occ_in[k] - 1] = 1.0; @@ -883,30 +1012,36 @@ bool LCAOrbitalBuilder::putOccupation(LCAOrbitalSet& spo, xmlNodePtr occ_ptr) return true; } -void LCAOrbitalBuilder::readRealMatrixFromH5(hdf_archive& hin, - const std::string& setname, - Matrix& Creal) const +template +void LCAOrbitalBuilderT::readRealMatrixFromH5(hdf_archive& hin, + const std::string& setname, + Matrix& Creal) const { hin.read(Creal, setname); } -void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix>& Ctemp, - bool MultiDet) +template +void LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix>& Ctemp, + bool MultiDet) { Matrix Creal; Matrix Ccmplx; std::array name; int name_len{0}; - ///When running Single Determinant calculations, MO coeff loaded based on occupation and lowest eingenvalue. - ///However, for solids with multideterminants, orbitals are order by kpoints; first all MOs for kpoint 1, then 2 etc - /// The multideterminants occupation is specified in the input/HDF5 and theefore as long as there is consistency between - /// the order in which we read the orbitals and the occupation, we are safe. In the case of Multideterminants generated - /// by pyscf and Quantum Package, They are stored in the same order as generated for quantum package and one should use - /// the orbitals labelled eigenset_unsorted. + /// When running Single Determinant calculations, MO coeff loaded based on + /// occupation and lowest eingenvalue. However, for solids with + /// multideterminants, orbitals are order by kpoints; first all MOs for + /// kpoint 1, then 2 etc + /// The multideterminants occupation is specified in the input/HDF5 and + /// theefore as long as there is consistency between the order in which we + /// read the orbitals and the occupation, we are safe. In the case of + /// Multideterminants generated by pyscf and Quantum Package, They are + /// stored in the same order as generated for quantum package and one + /// should use the orbitals labelled eigenset_unsorted. if (MultiDet == false) name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal); @@ -937,18 +1072,20 @@ void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, Ctemp[i][j] = std::complex(Creal[i][j], Ccmplx[i][j]); } -void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, - int setVal, - PosType& SuperTwist, - Matrix& Creal, - bool MultiDet) +template +void LCAOrbitalBuilderT::LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix& Creal, + bool MultiDet) { bool IsComplex = false; hin.read(IsComplex, "/parameters/IsComplex"); if (IsComplex && (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 || std::abs(SuperTwist[2]) >= 1e-6)) { - std::string setname("This Wavefunction is Complex and you are using the real version of QMCPACK. " + std::string setname("This Wavefunction is Complex and you are using the real version " + "of QMCPACK. " "Please re-run this job with the Complex build of QMCPACK."); APP_ABORT(setname.c_str()); } @@ -968,7 +1105,8 @@ void LCAOrbitalBuilder::LoadFullCoefsFromH5(hdf_archive& hin, } /// Periodic Image Phase Factors computation to be determined -void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( +template +void LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors( PosType SuperTwist, Vector>& LocPeriodicImagePhaseFactors, Array>& LocPeriodicImageDisplacements) @@ -980,8 +1118,8 @@ void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( ///Exp(ik.g) where i is imaginary, k is the supertwist and g is the translation vector PBCImage. if (h5_path != "" && !usesOpenBC) { - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) APP_ABORT("Could not open H5 file"); @@ -993,7 +1131,7 @@ void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( } for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - myComm->bcast(Lattice(i, j)); + this->myComm->bcast(Lattice(i, j)); } else if (!usesOpenBC) { @@ -1021,20 +1159,23 @@ void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( } } -void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( +template +void LCAOrbitalBuilderT::EvalPeriodicImagePhaseFactors( PosType SuperTwist, Vector, OffloadPinnedAllocator>>& LocPeriodicImagePhaseFactors, Array>& LocPeriodicImageDisplacements) { - // Allow computation to continue with no HDF file if the system has open boundary conditions. - // The complex build is usually only used with open BC for testing. + // Allow computation to continue with no HDF file if the system has open + // boundary conditions. The complex build is usually only used with open BC + // for testing. bool usesOpenBC = PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0; - ///Exp(ik.g) where i is imaginary, k is the supertwist and g is the translation vector PBCImage. + /// Exp(ik.g) where i is imaginary, k is the supertwist and g is the + /// translation vector PBCImage. if (h5_path != "" && !usesOpenBC) { - hdf_archive hin(myComm); - if (myComm->rank() == 0) + hdf_archive hin(this->myComm); + if (this->myComm->rank() == 0) { if (!hin.open(h5_path, H5F_ACC_RDONLY)) APP_ABORT("Could not open H5 file"); @@ -1046,11 +1187,12 @@ void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( } for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - myComm->bcast(Lattice(i, j)); + this->myComm->bcast(Lattice(i, j)); } else if (!usesOpenBC) { - APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour is unknown. Safer to exit"); + APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour " + "is unknown. Safer to exit"); } int phase_idx = 0; @@ -1084,4 +1226,18 @@ void LCAOrbitalBuilder::EvalPeriodicImagePhaseFactors( LocPeriodicImagePhaseFactors[i] = std::complex(c, s); } } + +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class LCAOrbitalBuilderT; +#else +template class LCAOrbitalBuilderT; +#endif +#else +#ifndef MIXED_PRECISION +template class LCAOrbitalBuilderT>; +#else +template class LCAOrbitalBuilderT>; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h new file mode 100644 index 0000000000..0704064c1e --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h @@ -0,0 +1,131 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H +#define QMCPLUSPLUS_SOA_LCAO_ORBITAL_BUILDERT_H + +#include "QMCWaveFunctions/LCAO/LCAOrbitalSetT.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" + +#include + +namespace qmcplusplus +{ +/** SPOSetBuilder using new LCAOrbitalSet and Soa versions + * + * Reimplement MolecularSPOSetBuilder + * - support both CartesianTensor and SphericalTensor + */ +template +class LCAOrbitalBuilderT : public SPOSetBuilderT +{ +public: + using BasisSet_t = typename LCAOrbitalSetT::basis_type; + using RealType = typename LCAOrbitalSetT::RealType; + using ValueType = typename LCAOrbitalSetT::ValueType; + using PosType = typename LCAOrbitalSetT::PosType; + + /** constructor + * \param els reference to the electrons + * \param ions reference to the ions + */ + LCAOrbitalBuilderT(ParticleSetT& els, ParticleSetT& ions, Communicate* comm, xmlNodePtr cur); + ~LCAOrbitalBuilderT() override; + std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) override; + +protected: + /// target ParticleSet + ParticleSetT& targetPtcl; + /// source ParticleSet + ParticleSetT& sourcePtcl; + /// localized basis set map + std::map> basisset_map_; + /// if true, add cusp correction to orbitals + bool cuspCorr; + /// Path to HDF5 Wavefunction + std::string h5_path; + /// Number of periodic Images for Orbital evaluation + TinyVector PBCImages; + /// Coordinates Super Twist + PosType SuperTwist; + ///Periodic Image Phase Factors. Correspond to the phase from the PBCImages. Computed only once. + Vector> PeriodicImagePhaseFactors; + Array> PeriodicImageDisplacements; + /// Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors + Tensor Lattice; + + /// Enable cusp correction + bool doCuspCorrection; + + /** create basis set + * + * Use ao_traits to match (ROT)x(SH) combo + */ + template + BasisSet_t* createBasisSet(xmlNodePtr cur); + template + BasisSet_t* createBasisSetH5(); + + // The following items were previously in SPOSet + /// occupation number + Vector Occ; + bool loadMO(LCAOrbitalSetT& spo, xmlNodePtr cur); + bool putOccupation(LCAOrbitalSetT& spo, xmlNodePtr occ_ptr); + bool putFromXML(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + bool putFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + bool putPBCFromH5(LCAOrbitalSetT& spo, xmlNodePtr coeff_ptr); + // the dimensions of Ctemp are determined by the dataset on file + void LoadFullCoefsFromH5(hdf_archive& hin, + int setVal, + PosType& SuperTwist, + Matrix>& Ctemp, + bool MultiDet); + // the dimensions of Creal are determined by the dataset on file + void LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, Matrix& Creal, bool Multidet); + void EvalPeriodicImagePhaseFactors( + PosType SuperTwist, + Vector>& LocPeriodicImagePhaseFactors, + Array>& LocPeriodicImageDisplacements); + void EvalPeriodicImagePhaseFactors( + PosType SuperTwist, + Vector, OffloadPinnedAllocator>>& LocPeriodicImagePhaseFactors, + Array>& LocPeriodicImageDisplacements); + /** read matrix from h5 file + * \param[in] hin: hdf5 arhive to be read from + * \param setname: where to read from in hdf5 archive + * \param[out] Creal: matrix read from h5 + * + * added in header to allow use from derived class LCAOSpinorBuilder as well + */ + void readRealMatrixFromH5(hdf_archive& hin, const std::string& setname, Matrix& Creal) const; + +private: + /// enable cusp correction + std::unique_ptr> createWithCuspCorrection(xmlNodePtr cur, + const std::string& spo_name, + std::string cusp_file, + std::unique_ptr&& myBasisSet); + /// load a basis set from XML input + std::unique_ptr loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent); + /// load a basis set from h5 file + std::unique_ptr loadBasisSetFromH5(xmlNodePtr parent); + /// determine radial orbital type based on "keyword" and "transform" + /// attributes + int determineRadialOrbType(xmlNodePtr cur) const; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp deleted file mode 100644 index 026dc272ba..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp +++ /dev/null @@ -1,991 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "LCAOrbitalSet.h" -#include "Numerics/MatrixOperators.h" -#include "CPU/BLAS.hpp" -#include - -namespace qmcplusplus -{ - -struct LCAOrbitalSet::LCAOMultiWalkerMem : public Resource -{ - LCAOMultiWalkerMem() : Resource("LCAOrbitalSet") {} - LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} - - std::unique_ptr makeClone() const override { return std::make_unique(*this); } - - OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] - OffloadMWVGLArray basis_vgl_mw; // [5][NW][NumAO] - OffloadMWVArray phi_v; // [NW][NumMO] - OffloadMWVArray basis_v_mw; // [NW][NumAO] - OffloadMWVArray vp_phi_v; // [NVPs][NumMO] - OffloadMWVArray vp_basis_v_mw; // [NVPs][NumAO] -}; - -LCAOrbitalSet::LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs) - : SPOSet(my_name), - BasisSetSize(bs ? bs->getBasisSetSize() : 0), - Identity(true), - basis_timer_(createGlobalTimer("LCAOrbitalSet::Basis", timer_level_fine)), - mo_timer_(createGlobalTimer("LCAOrbitalSet::MO", timer_level_fine)) -{ - if (!bs) - throw std::runtime_error("LCAOrbitalSet cannot take nullptr as its basis set!"); - myBasisSet = std::move(bs); - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - OrbitalSetSize = BasisSetSize; - LCAOrbitalSet::checkObject(); -} - -LCAOrbitalSet::LCAOrbitalSet(const LCAOrbitalSet& in) - : SPOSet(in), - myBasisSet(in.myBasisSet->makeClone()), - C(in.C), - BasisSetSize(in.BasisSetSize), - C_copy(in.C_copy), - Identity(in.Identity), - basis_timer_(in.basis_timer_), - mo_timer_(in.mo_timer_) -{ - Temp.resize(BasisSetSize); - Temph.resize(BasisSetSize); - Tempgh.resize(BasisSetSize); - if (!in.Identity) - { - Tempv.resize(OrbitalSetSize); - Temphv.resize(OrbitalSetSize); - Tempghv.resize(OrbitalSetSize); - } - LCAOrbitalSet::checkObject(); -} - -void LCAOrbitalSet::setOrbitalSetSize(int norbs) -{ - if (C) - throw std::runtime_error("LCAOrbitalSet::setOrbitalSetSize cannot reset existing MO coefficients"); - - Identity = false; - OrbitalSetSize = norbs; - C = std::make_shared(OrbitalSetSize, BasisSetSize); - Tempv.resize(OrbitalSetSize); - Temphv.resize(OrbitalSetSize); - Tempghv.resize(OrbitalSetSize); - LCAOrbitalSet::checkObject(); -} - -void LCAOrbitalSet::checkObject() const -{ - if (Identity) - { - if (OrbitalSetSize != BasisSetSize) - throw std::runtime_error( - "LCAOrbitalSet::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!"); - if (C) - throw std::runtime_error("LCAOrbitalSet::checkObject C should be nullptr if Identity = true!"); - } - else - { - if (!C) - throw std::runtime_error("LCAOrbitalSet::checkObject C should not be nullptr if Identity = false!"); - if (OrbitalSetSize != C->rows()) - throw std::runtime_error("LCAOrbitalSet::checkObject C rows doesn't match OrbitalSetSize."); - if (BasisSetSize != C->cols()) - throw std::runtime_error("LCAOrbitalSet::checkObject C columns doesn't match BasisSetSize."); - } -} - -void LCAOrbitalSet::createResource(ResourceCollection& collection) const -{ - myBasisSet->createResource(collection); - - auto resource_index = collection.addResource(std::make_unique()); -} - -void LCAOrbitalSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - - spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); - - spo_leader.mw_mem_handle_ = collection.lendResource(); -} - -void LCAOrbitalSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - - spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); - - collection.takebackResource(spo_leader.mw_mem_handle_); -} - -RefVectorWithLeader LCAOrbitalSet::extractBasisRefList( - const RefVectorWithLeader& spo_list) const -{ - RefVectorWithLeader basis_list(*spo_list.getCastedLeader().myBasisSet); - basis_list.reserve(spo_list.size()); - for (size_t iw = 0; iw < spo_list.size(); iw++) - basis_list.push_back(*spo_list.getCastedElement(iw).myBasisSet); - return basis_list; -} -std::unique_ptr LCAOrbitalSet::makeClone() const { return std::make_unique(*this); } - -void LCAOrbitalSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - if (Identity) - { //PAY ATTENTION TO COMPLEX - myBasisSet->evaluateV(P, iat, psi.data()); - } - else - { - Vector vTemp(Temp.data(0), BasisSetSize); - myBasisSet->evaluateV(P, iat, vTemp.data()); - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - MatrixOperators::product(C_partial_view, vTemp, psi); - } -} - -/** Find a better place for other user classes, Matrix should be padded as well */ -template -inline void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) -{ - constexpr char transa = 't'; - constexpr char transb = 'n'; - constexpr T zone(1); - constexpr T zero(0); - BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), - C.capacity()); -} - -inline void LCAOrbitalSet::evaluate_vgl_impl(const vgl_type& temp, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2psi.data()); -} - -inline void LCAOrbitalSet::evaluate_vgh_impl(const vgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_vghgh_impl(const vghgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi, - GGGMatrix& dghpsi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xyz = temp.data(14); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - - dghpsi[i][j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[i][j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[i][j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[i][j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[i][j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[i][j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[i][j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[i][j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[i][j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[i][j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[i][j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[i][j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[i][j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[i][j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[i][j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[i][j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[i][j][1](2, 1) = gh_yyz[j]; //y|zy = yyz - dghpsi[i][j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[i][j][2](0, 0) = gh_xxz[j]; //z|xx = xxz - dghpsi[i][j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[i][j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[i][j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[i][j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[i][j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[i][j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[i][j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[i][j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -inline void LCAOrbitalSet::evaluate_vghgh_impl(const vghgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi, - GGGVector& dghpsi) const -{ - const size_t output_size = psi.size(); - std::copy_n(temp.data(0), output_size, psi.data()); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xyz = temp.data(14); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[j][0] = gx[j]; - dpsi[j][1] = gy[j]; - dpsi[j][2] = gz[j]; - - d2psi[j](0, 0) = hxx[j]; - d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; - d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; - d2psi[j](1, 1) = hyy[j]; - d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; - d2psi[j](2, 2) = hzz[j]; - - dghpsi[j][0](0, 0) = gh_xxx[j]; //x|xx - dghpsi[j][0](0, 1) = gh_xxy[j]; //x|xy - dghpsi[j][0](0, 2) = gh_xxz[j]; //x|xz - dghpsi[j][0](1, 0) = gh_xxy[j]; //x|yx = xxy - dghpsi[j][0](1, 1) = gh_xyy[j]; //x|yy - dghpsi[j][0](1, 2) = gh_xyz[j]; //x|yz - dghpsi[j][0](2, 0) = gh_xxz[j]; //x|zx = xxz - dghpsi[j][0](2, 1) = gh_xyz[j]; //x|zy = xyz - dghpsi[j][0](2, 2) = gh_xzz[j]; //x|zz - - dghpsi[j][1](0, 0) = gh_xxy[j]; //y|xx = xxy - dghpsi[j][1](0, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](0, 2) = gh_xyz[j]; //y|xz = xyz - dghpsi[j][1](1, 0) = gh_xyy[j]; //y|yx = xyy - dghpsi[j][1](1, 1) = gh_yyy[j]; //y|yy - dghpsi[j][1](1, 2) = gh_yyz[j]; //y|yz - dghpsi[j][1](2, 0) = gh_xyz[j]; //y|zx = xyz - dghpsi[j][1](2, 1) = gh_xyy[j]; //y|xy = xyy - dghpsi[j][1](2, 2) = gh_yzz[j]; //y|zz - - dghpsi[j][2](0, 0) = gh_xzz[j]; //z|xx = xzz - dghpsi[j][2](0, 1) = gh_xyz[j]; //z|xy = xyz - dghpsi[j][2](0, 2) = gh_xzz[j]; //z|xz = xzz - dghpsi[j][2](1, 0) = gh_xyz[j]; //z|yx = xyz - dghpsi[j][2](1, 1) = gh_yyz[j]; //z|yy = yyz - dghpsi[j][2](1, 2) = gh_yzz[j]; //z|yz = yzz - dghpsi[j][2](2, 0) = gh_xzz[j]; //z|zx = xzz - dghpsi[j][2](2, 1) = gh_yzz[j]; //z|zy = yzz - dghpsi[j][2](2, 2) = gh_zzz[j]; //z|zz - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const -{ - const size_t output_size = dpsi.size(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[j][0] = -gx[j]; - dpsi[j][1] = -gy[j]; - dpsi[j][2] = -gz[j]; - } -} - - -void LCAOrbitalSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - //TAKE CARE OF IDENTITY - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateVGL(P, iat, Temp); - } - - if (Identity) - evaluate_vgl_impl(Temp, psi, dpsi, d2psi); - else - { - assert(psi.size() <= OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temp, C_partial_view, Tempv); - } - evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); - } -} - -void LCAOrbitalSet::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; - - phi_vgl_v.resize(DIM_VGL, spo_list.size(), OrbitalSetSize); - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - - const size_t nw = phi_vgl_v.size(1); - - //TODO: make this cleaner? - for (int iw = 0; iw < nw; iw++) - { - const size_t output_size = psi_v_list[iw].get().size(); - std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); - std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); - // grads are [dim, walker, orb] in phi_vgl_v - // [walker][orb, dim] in dpsi_v_list - for (size_t idim = 0; idim < DIM; idim++) - BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], DIM); - } -} - -void LCAOrbitalSet::mw_evaluateVGLImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& phi_vgl_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& basis_vgl_mw = spo_leader.mw_mem_handle_.getResource().basis_vgl_mw; - basis_vgl_mw.resize(DIM_VGL, spo_list.size(), BasisSetSize); - - { - ScopedTimer local(basis_timer_); - auto basis_list = spo_leader.extractBasisRefList(spo_list); - myBasisSet->mw_evaluateVGL(basis_list, P_list, iat, basis_vgl_mw); - basis_vgl_mw.updateFrom(); // TODO: remove this when gemm is implemented - } - - if (Identity) - { - // output_size can be smaller than BasisSetSize - const size_t output_size = phi_vgl_v.size(2); - const size_t nw = phi_vgl_v.size(1); - - for (size_t idim = 0; idim < DIM_VGL; idim++) - for (int iw = 0; iw < nw; iw++) - std::copy_n(basis_vgl_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); - } - else - { - const size_t requested_orb_size = phi_vgl_v.size(2); - assert(requested_orb_size <= OrbitalSetSize); - { - ScopedTimer local(mo_timer_); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - // TODO: make class for general blas interface in Platforms - // have instance of that class as member of LCAOrbitalSet, call gemm through that - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size() * DIM_VGL, // walkers * DIM_VGL - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_vgl_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), - requested_orb_size); - } - } -} - -void LCAOrbitalSet::mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - OffloadMWVArray& vp_phi_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - //const size_t nw = spo_list.size(); - auto& vp_basis_v_mw = spo_leader.mw_mem_handle_.getResource().vp_basis_v_mw; - //Splatter basis_v - const size_t nVPs = vp_phi_v.size(0); - vp_basis_v_mw.resize(nVPs, BasisSetSize); - - auto basis_list = spo_leader.extractBasisRefList(spo_list); - myBasisSet->mw_evaluateValueVPs(basis_list, vp_list, vp_basis_v_mw); - vp_basis_v_mw.updateFrom(); // TODO: remove this when gemm is implemented - - if (Identity) - { - std::copy_n(vp_basis_v_mw.data_at(0, 0), OrbitalSetSize * nVPs, vp_phi_v.data_at(0, 0)); - } - else - { - const size_t requested_orb_size = vp_phi_v.size(1); - assert(requested_orb_size <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - nVPs, // walkers * Virtual Particles - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, vp_basis_v_mw.data(), BasisSetSize, 0, vp_phi_v.data(), - requested_orb_size); - } -} -void LCAOrbitalSet::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; - phi_v.resize(spo_list.size(), OrbitalSetSize); - mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); - - const size_t output_size = phi_v.size(1); - const size_t nw = phi_v.size(0); - - for (int iw = 0; iw < nw; iw++) - std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); -} - -void LCAOrbitalSet::mw_evaluateValueImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& phi_v) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - const size_t nw = spo_list.size(); - auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; - basis_v_mw.resize(nw, BasisSetSize); - - auto basis_list = spo_leader.extractBasisRefList(spo_list); - myBasisSet->mw_evaluateValue(basis_list, P_list, iat, basis_v_mw); - basis_v_mw.updateFrom(); // TODO: remove this when gemm is implemented - - if (Identity) - { - std::copy_n(basis_v_mw.data_at(0, 0), OrbitalSetSize * nw, phi_v.data_at(0, 0)); - } - else - { - const size_t requested_orb_size = phi_v.size(1); - assert(requested_orb_size <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); - BLAS::gemm('T', 'N', - requested_orb_size, // MOs - spo_list.size(), // walkers - BasisSetSize, // AOs - 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), - requested_orb_size); - } -} - -void LCAOrbitalSet::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - auto& spo_leader = spo_list.getCastedLeader(); - auto& vp_phi_v = spo_leader.mw_mem_handle_.getResource().vp_phi_v; - - const size_t nVPs = VirtualParticleSet::countVPs(vp_list); - const size_t requested_orb_size = psi_list[0].get().size(); - vp_phi_v.resize(nVPs, requested_orb_size); - - mw_evaluateValueVPsImplGEMM(spo_list, vp_list, vp_phi_v); - - size_t index = 0; - for (size_t iw = 0; iw < vp_list.size(); iw++) - for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) - ratios_list[iw][iat] = simd::dot(vp_phi_v.data_at(index++, 0), invRow_ptr_list[iw], requested_orb_size); -} - -void LCAOrbitalSet::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - Vector vTemp(Temp.data(0), BasisSetSize); - Vector invTemp(Temp.data(1), BasisSetSize); - - if (Identity) - std::copy_n(psiinv.data(), psiinv.size(), invTemp.data()); - else - { - ScopedTimer local(mo_timer_); - // when only a subset of orbitals is used, extract limited rows of C. - Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); - MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); - } - - for (size_t j = 0; j < VP.getTotalNum(); j++) - { - { - ScopedTimer local(basis_timer_); - myBasisSet->evaluateV(VP, j, vTemp.data()); - } - ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); - } -} - -void LCAOrbitalSet::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - - mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); - // Device data of phi_vgl_v must be up-to-date upon return - phi_vgl_v.updateTo(); - - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - for (int iw = 0; iw < nw; iw++) - { - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); - GradType dphi; - for (size_t idim = 0; idim < DIM; idim++) - dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; - grads[iw] = dphi; - } -} - -void LCAOrbitalSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi) -{ - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGH(P, iat, Temph); - if (Identity) - evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); - else - { - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); - } -} - -void LCAOrbitalSet::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& dhpsi, - GGGVector& dghpsi) -{ - // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not implemented\n"); - - //TAKE CARE OF IDENTITY - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - if (Identity) - evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); - else - { - assert(psi.size() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); - } -} - -/* implement using gemm algorithm */ -inline void LCAOrbitalSet::evaluate_vgl_impl(const vgl_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) const -{ - const size_t output_size = logdet.cols(); - std::copy_n(temp.data(0), output_size, logdet[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - for (size_t j = 0; j < output_size; j++) - { - dlogdet[i][j][0] = gx[j]; - dlogdet[i][j][1] = gy[j]; - dlogdet[i][j][2] = gz[j]; - } - std::copy_n(temp.data(4), output_size, d2logdet[i]); -} - -inline void LCAOrbitalSet::evaluate_vgh_impl(const vgh_type& temp, - int i, - ValueMatrix& psi, - GradMatrix& dpsi, - HessMatrix& d2psi) const -{ - const size_t output_size = psi.cols(); - std::copy_n(temp.data(0), output_size, psi[i]); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - - for (size_t j = 0; j < output_size; j++) - { - dpsi[i][j][0] = gx[j]; - dpsi[i][j][1] = gy[j]; - dpsi[i][j][2] = gz[j]; - - d2psi[i][j](0, 0) = hxx[j]; - d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; - d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; - d2psi[i][j](1, 1) = hyy[j]; - d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; - d2psi[i][j](2, 2) = hzz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const -{ - const size_t output_size = dpsi.cols(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - } -} - -inline void LCAOrbitalSet::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, - int i, - GradMatrix& dpsi, - HessMatrix& dgpsi, - GradMatrix& dlpsi) const -{ - const size_t output_size = dpsi.cols(); - const ValueType* restrict gx = temp.data(1); - const ValueType* restrict gy = temp.data(2); - const ValueType* restrict gz = temp.data(3); - const ValueType* restrict hxx = temp.data(4); - const ValueType* restrict hxy = temp.data(5); - const ValueType* restrict hxz = temp.data(6); - const ValueType* restrict hyy = temp.data(7); - const ValueType* restrict hyz = temp.data(8); - const ValueType* restrict hzz = temp.data(9); - const ValueType* restrict gh_xxx = temp.data(10); - const ValueType* restrict gh_xxy = temp.data(11); - const ValueType* restrict gh_xxz = temp.data(12); - const ValueType* restrict gh_xyy = temp.data(13); - const ValueType* restrict gh_xzz = temp.data(15); - const ValueType* restrict gh_yyy = temp.data(16); - const ValueType* restrict gh_yyz = temp.data(17); - const ValueType* restrict gh_yzz = temp.data(18); - const ValueType* restrict gh_zzz = temp.data(19); - - for (size_t j = 0; j < output_size; j++) - { - //As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property that - // for an atomic center, the ion gradient is the negative of the elecron gradient. - // Hence minus signs for each of these. - dpsi[i][j][0] = -gx[j]; - dpsi[i][j][1] = -gy[j]; - dpsi[i][j][2] = -gz[j]; - - dgpsi[i][j](0, 0) = -hxx[j]; - dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; - dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; - dgpsi[i][j](1, 1) = -hyy[j]; - dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; - dgpsi[i][j](2, 2) = -hzz[j]; - - //Since this returns the ion gradient of the laplacian, we have to trace the grad hessian vector. - dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); - dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); - dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGL(P, iat, Temp); - Product_ABt(Temp, C_partial_view, Tempv); - evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); - } - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGH(P, iat, Temph); - Product_ABt(Temph, C_partial_view, Temphv); - evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } - else - { - assert(logdet.cols() <= OrbitalSetSize); - ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateVGHGH(P, iat, Tempgh); - Product_ABt(Tempgh, C_partial_view, Tempghv); - evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp); - evaluate_ionderiv_v_impl(Temp, i, gradphi); - } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, Temp); - Product_ABt(Temp, *C, Tempv); - evaluate_ionderiv_v_impl(Tempv, i, gradphi); - } - } -} - -void LCAOrbitalSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - if (Identity) - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh); - evaluate_ionderiv_vgl_impl(Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); - } - } - else - { - for (size_t i = 0, iat = first; iat < last; i++, iat++) - { - myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, Tempgh); - Product_ABt(Tempgh, *C, Tempghv); - evaluate_ionderiv_vgl_impl(Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); - // evaluate_vghgh_impl(Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - } -} - -void LCAOrbitalSet::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) -{ - if (Identity) - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp); - evaluate_ionderiv_v_row_impl(Temp, gradphi); - } - else - { - myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, Temp); - Product_ABt(Temp, *C, Tempv); - evaluate_ionderiv_v_row_impl(Tempv, gradphi); - } -} - -void LCAOrbitalSet::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (!use_stored_copy) - *C_copy = *C; - //gemm is out-of-place - BLAS::gemm('N', 'T', BasisSetSize, OrbitalSetSize, OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize, - rot_mat.data(), OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); - - /* debugging code - app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << std::endl; - for (int j = 0; j < OrbitalSetSize; j++) - for (int i = 0; i < BasisSetSize; i++) - { - app_log() << " " << std::right << std::fixed << std::setprecision(16) << std::setw(23) << std::scientific - << *(C->data() + j * BasisSetSize + i); - - if ((j * BasisSetSize + i + 1) % 4 == 0) - app_log() << std::endl; - } - */ -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h index cf6706df95..811105330d 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h @@ -13,310 +13,13 @@ #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_TEMP_H -#include -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" - -#include "Numerics/MatrixOperators.h" -#include "Numerics/DeterminantOperators.h" +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" namespace qmcplusplus { -/** class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants. - * - * SoA verson of LCOrtbitalSet - * Localized basis set is always real - */ -struct LCAOrbitalSet : public SPOSet -{ -public: - using basis_type = SoaBasisSetBase; - using vgl_type = basis_type::vgl_type; - using vgh_type = basis_type::vgh_type; - using vghgh_type = basis_type::vghgh_type; - - ///pointer to the basis set - std::unique_ptr myBasisSet; - /// pointer to matrix containing the coefficients - std::shared_ptr C; - - /** constructor - * @param bs pointer to the BasisSet - */ - LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs); - - LCAOrbitalSet(const LCAOrbitalSet& in); - - std::string getClassName() const final { return "LCAOrbitalSet"; } - - bool isRotationSupported() const final { return true; } - - bool hasIonDerivs() const final { return true; } - - std::unique_ptr makeClone() const final; - - void storeParamsBeforeRotation() final { C_copy = std::make_shared(*C); } - - void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; - - /** set the OrbitalSetSize and Identity=false and initialize internal storages - */ - void setOrbitalSetSize(int norbs) final; - - /** return the size of the basis set - */ - int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); } - - bool isIdentity() const { return Identity; }; - - /** check consistency between Identity and C - * - */ - void checkObject() const final; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const final; - - void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const final; - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const final; - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) final; - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const final; - - void evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) final; - - void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) final; - - //NOTE: The data types get complicated here, so here's an overview of the - // data types associated with ionic derivatives, and how to get their data. - // - //NOTE: These data structures hold the data for one particular ion, and so the ID is implicit. - // It's up to the user to keep track of which ion these derivatives refer to. - // - // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for each electron. - // Example: grad_phi[iel][iorb][idim]. iel -- electron index. - // iorb -- orbital index. - // idim -- cartesian index of ionic derivative. - // X=0, Y=1, Z=2. - // - // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the electron gradient components - // for each SPO and each electron. - // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- electron index. - // iorb -- orbital index. - // idim -- ionic derivative's cartesian index. - // X=0, Y=1, Z=2 - // edim -- electron derivative's cartesian index. - // x=0, y=1, z=2. - // - // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the electron laplacian for each SPO and each electron. - // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron index. - // iorb -- orbital index. - // idim -- cartesian index of ionic derivative. - // X=0, Y=1, Z=2. - - /** - * \brief Calculate ion derivatives of SPO's. - * - * @param P Electron particle set. - * @param first index of first electron - * @@param last index of last electron - * @param source Ion particle set. - * @param iat_src Index of ion. - * @param gradphi Container storing ion gradients for all particles and all orbitals. - */ - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi) final; - - /** - * \brief Calculate ion derivatives of SPO's, their gradients, and their laplacians. - * - * @param P Electron particle set. - * @param first index of first electron. - * @@param last index of last electron - * @param source Ion particle set. - * @param iat_src Index of ion. - * @param grad_phi Container storing ion gradients for all particles and all orbitals. - * @param grad_grad_phi Container storing ion gradients of electron gradients for all particles and all orbitals. - * @param grad_lapl_phi Container storing ion gradients of SPO laplacians for all particles and all orbitals. - */ - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) final; - - void evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& grad_phi) final; - - void createResource(ResourceCollection& collection) const final; - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const final; - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const final; - -protected: - ///number of Single-particle orbitals - const IndexType BasisSetSize; - /// a copy of the original C before orbital rotation is applied; - std::shared_ptr C_copy; - - ///true if C is an identity matrix - bool Identity; - ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L - vgl_type Temp; - ///Tempv(OrbitalSetSize) Tempv=C*Temp - vgl_type Tempv; - - ///These are temporary VectorSoAContainers to hold value, gradient, and hessian for - ///all basis or SPO functions evaluated at a given point. - ///Nbasis x [1(value)+3(gradient)+6(hessian)] - vgh_type Temph; - ///Norbitals x [1(value)+3(gradient)+6(hessian)] - vgh_type Temphv; - - ///These are temporary VectorSoAContainers to hold value, gradient, hessian, and - /// gradient hessian for all basis or SPO functions evaluated at a given point. - ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempgh; - ///Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] - vghgh_type Tempghv; - -private: - ///helper functions to handle Identity - void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const; - - void evaluate_vgl_impl(const vgl_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) const; - ///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures. - - - ///This unpacks temp into vectors psi, dpsi, and d2psi. - void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const; - - ///Unpacks temp into the ith row (or electron index) of logdet, dlogdet, dhlogdet. - void evaluate_vgh_impl(const vgh_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& dhlogdet) const; - ///Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value, gradient, hessian - ///and gradient hessian. - void evaluate_vghgh_impl(const vghgh_type& temp, - ValueVector& psi, - GradVector& dpsi, - HessVector& d2psi, - GGGVector& dghpsi) const; - - void evaluate_vghgh_impl(const vghgh_type& temp, - int i, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& dhlogdet, - GGGMatrix& dghlogdet) const; - - - ///Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet. - void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const; - - ///Unpacks data in vgl object and calculates/places ionic gradient of value, - /// electron gradient, and electron laplacian result into dlogdet, dglogdet, and dllogdet respectively. - void evaluate_ionderiv_vgl_impl(const vghgh_type& temp, - int i, - GradMatrix& dlogdet, - HessMatrix& dglogdet, - GradMatrix& dllogdet) const; - - ///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet. - void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const; - - void mw_evaluateVGLImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVGLArray& phi_vgl_v) const; - - /// packed walker GEMM implementation - void mw_evaluateValueImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - OffloadMWVArray& phi_v) const; - /// packed walker GEMM implementation with multi virtual particle sets - void mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - OffloadMWVArray& phi_v) const; +using LCAOrbitalSet = LCAOrbitalSetT; - /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet - RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader& spo_list) const; - struct LCAOMultiWalkerMem; - ResourceHandle mw_mem_handle_; - /// timer for basis set - NewTimer& basis_timer_; - /// timer for MO - NewTimer& mo_timer_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp new file mode 100644 index 0000000000..99c91ead30 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -0,0 +1,1022 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#include "LCAOrbitalSetT.h" + +#include "CPU/BLAS.hpp" +#include "Numerics/MatrixOperators.h" +#include + +namespace qmcplusplus +{ + +template +struct LCAOrbitalSetT::LCAOMultiWalkerMem : public Resource +{ + LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {} + LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {} + + std::unique_ptr makeClone() const override { return std::make_unique(*this); } + + OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO] + OffloadMWVGLArray basis_vgl_mw; // [5][NW][NumAO] + OffloadMWVArray phi_v; // [NW][NumMO] + OffloadMWVArray basis_v_mw; // [NW][NumAO] + OffloadMWVArray vp_phi_v; // [NVPs][NumMO] + OffloadMWVArray vp_basis_v_mw; // [NVPs][NumAO] +}; + +template +LCAOrbitalSetT::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs) + : SPOSetT(my_name), + BasisSetSize(bs ? bs->getBasisSetSize() : 0), + Identity(true), + basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)), + mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine)) +{ + if (!bs) + throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its basis set!"); + myBasisSet = std::move(bs); + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + this->OrbitalSetSize = BasisSetSize; + LCAOrbitalSetT::checkObject(); +} + +template +LCAOrbitalSetT::LCAOrbitalSetT(const LCAOrbitalSetT& in) + : SPOSetT(in), + myBasisSet(in.myBasisSet->makeClone()), + C(in.C), + BasisSetSize(in.BasisSetSize), + C_copy(in.C_copy), + Identity(in.Identity), + basis_timer_(in.basis_timer_), + mo_timer_(in.mo_timer_) +{ + Temp.resize(BasisSetSize); + Temph.resize(BasisSetSize); + Tempgh.resize(BasisSetSize); + if (!in.Identity) + { + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + } + LCAOrbitalSetT::checkObject(); +} + +template +void LCAOrbitalSetT::setOrbitalSetSize(int norbs) +{ + if (C) + throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot " + "reset existing MO coefficients"); + + Identity = false; + this->OrbitalSetSize = norbs; + C = std::make_shared(this->OrbitalSetSize, BasisSetSize); + Tempv.resize(this->OrbitalSetSize); + Temphv.resize(this->OrbitalSetSize); + Tempghv.resize(this->OrbitalSetSize); + LCAOrbitalSetT::checkObject(); +} + +template +void LCAOrbitalSetT::checkObject() const +{ + if (Identity) + { + if (this->OrbitalSetSize != BasisSetSize) + throw std::runtime_error("LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize " + "must be equal if Identity = true!"); + if (C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should be " + "nullptr if Identity = true!"); + } + else + { + if (!C) + throw std::runtime_error("LCAOrbitalSetT::checkObject C should not " + "be nullptr if Identity = false!"); + if (this->OrbitalSetSize != C->rows()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C rows " + "doesn't match OrbitalSetSize."); + if (BasisSetSize != C->cols()) + throw std::runtime_error("LCAOrbitalSetT::checkObject C columns " + "doesn't match BasisSetSize."); + } +} + +template +void LCAOrbitalSetT::createResource(ResourceCollection& collection) const +{ + myBasisSet->createResource(collection); + auto resource_index = collection.addResource(std::make_unique()); +} + +template +void LCAOrbitalSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list)); + spo_leader.mw_mem_handle_ = collection.lendResource(); +} + +template +void LCAOrbitalSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list)); + collection.takebackResource(spo_leader.mw_mem_handle_); +} + +template +RefVectorWithLeader::basis_type> LCAOrbitalSetT::extractBasisRefList( + const RefVectorWithLeader>& spo_list) const +{ + RefVectorWithLeader basis_list(*spo_list.template getCastedLeader>().myBasisSet); + basis_list.reserve(spo_list.size()); + for (size_t iw = 0; iw < spo_list.size(); iw++) + basis_list.push_back(*spo_list.template getCastedElement>(iw).myBasisSet); + return basis_list; +} + +template +std::unique_ptr> LCAOrbitalSetT::makeClone() const +{ + return std::make_unique>(*this); +} + +template +void LCAOrbitalSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + if (Identity) + { // PAY ATTENTION TO COMPLEX + myBasisSet->evaluateV(P, iat, psi.data()); + } + else + { + Vector vTemp(Temp.data(0), BasisSetSize); + this->myBasisSet->evaluateV(P, iat, vTemp.data()); + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + MatrixOperators::product(C_partial_view, vTemp, psi); + } +} + +/** Find a better place for other user classes, Matrix should be padded as well + */ +template +static void Product_ABt(const VectorSoaContainer& A, const Matrix& B, VectorSoaContainer& C) +{ + constexpr char transa = 't'; + constexpr char transb = 'n'; + constexpr T zone(1); + constexpr T zero(0); + BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(), + C.capacity()); +} + +template +void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2psi.data()); +} + +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi, + GGGMatrix& dghpsi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + + dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz + dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz + dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const +{ + const size_t output_size = psi.size(); + std::copy_n(temp.data(0), output_size, psi.data()); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xyz = temp.data(14); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[j][0] = gx[j]; + dpsi[j][1] = gy[j]; + dpsi[j][2] = gz[j]; + + d2psi[j](0, 0) = hxx[j]; + d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j]; + d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j]; + d2psi[j](1, 1) = hyy[j]; + d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j]; + d2psi[j](2, 2) = hzz[j]; + + dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx + dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy + dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz + dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy + dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy + dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz + dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz + dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz + dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz + + dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy + dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz + dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy + dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy + dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz + dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz + dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy + dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz + + dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz + dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz + dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz + dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz + dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz + dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz + dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz + dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz + dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const +{ + const size_t output_size = dpsi.size(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[j][0] = -gx[j]; + dpsi[j][1] = -gy[j]; + dpsi[j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + // TAKE CARE OF IDENTITY + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateVGL(P, iat, Temp); + } + + if (Identity) + evaluate_vgl_impl(Temp, psi, dpsi, d2psi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temp, C_partial_view, Tempv); + } + evaluate_vgl_impl(Tempv, psi, dpsi, d2psi); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v; + + phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize); + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + + const size_t nw = phi_vgl_v.size(1); + + // TODO: make this cleaner? + for (int iw = 0; iw < nw; iw++) + { + const size_t output_size = psi_v_list[iw].get().size(); + std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data()); + std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data()); + // grads are [dim, walker, orb] in phi_vgl_v + // [walker][orb, dim] in dpsi_v_list + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], + QMCTraits::DIM); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& basis_vgl_mw = spo_leader.mw_mem_handle_.getResource().basis_vgl_mw; + basis_vgl_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize); + + { + ScopedTimer local(basis_timer_); + auto basis_list = spo_leader.extractBasisRefList(spo_list); + myBasisSet->mw_evaluateVGL(basis_list, P_list, iat, basis_vgl_mw); + basis_vgl_mw.updateFrom(); // TODO: remove this when gemm is implemented + } + + if (Identity) + { + // output_size can be smaller than BasisSetSize + const size_t output_size = phi_vgl_v.size(2); + const size_t nw = phi_vgl_v.size(1); + + for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++) + for (int iw = 0; iw < nw; iw++) + std::copy_n(basis_vgl_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0)); + } + else + { + const size_t requested_orb_size = phi_vgl_v.size(2); + assert(requested_orb_size <= this->OrbitalSetSize); + { + ScopedTimer local(mo_timer_); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + // TODO: make class for general blas interface in Platforms + // have instance of that class as member of LCAOrbitalSetT, call + // gemm through that + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_vgl_mw.data(), BasisSetSize, 0, phi_vgl_v.data(), + requested_orb_size); + } + } +} + +template +void LCAOrbitalSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v; + phi_v.resize(spo_list.size(), this->OrbitalSetSize); + mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v); + + const size_t output_size = phi_v.size(1); + const size_t nw = phi_v.size(0); + + for (int iw = 0; iw < nw; iw++) + std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data()); +} + +template +void LCAOrbitalSetT::mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVArray& phi_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + const size_t nw = spo_list.size(); + auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw; + basis_v_mw.resize(nw, BasisSetSize); + + auto basis_list = spo_leader.extractBasisRefList(spo_list); + myBasisSet->mw_evaluateValue(basis_list, P_list, iat, basis_v_mw); + basis_v_mw.updateFrom(); // TODO: remove this when gemm is implemented + + if (Identity) + { + std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0)); + } + else + { + const size_t requested_orb_size = phi_v.size(1); + assert(requested_orb_size <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + spo_list.size(), // walkers + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(), + requested_orb_size); + } +} + +template +void LCAOrbitalSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + const size_t nw = spo_list.size(); + for (size_t iw = 0; iw < nw; iw++) + { + for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) + { + spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); + ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size()); + } + } +} + +template +void LCAOrbitalSetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) +{ + Vector vTemp(Temp.data(0), BasisSetSize); + Vector invTemp(Temp.data(1), BasisSetSize); + + { + ScopedTimer local(mo_timer_); + // when only a subset of orbitals is used, extract limited rows of C. + Matrix C_occupied(C->data(), psiinv.size(), BasisSetSize); + MatrixOperators::product_Atx(C_occupied, psiinv, invTemp); + } + + for (size_t j = 0; j < VP.getTotalNum(); j++) + { + { + ScopedTimer local(basis_timer_); + myBasisSet->evaluateV(VP, j, vTemp.data()); + } + ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize); + } +} + +template +void LCAOrbitalSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const +{ + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + + mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v); + // Device data of phi_vgl_v must be up-to-date upon return + phi_vgl_v.updateTo(); + + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + for (int iw = 0; iw < nw; iw++) + { + ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested); + GradType dphi; + for (size_t idim = 0; idim < QMCTraits::DIM; idim++) + dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw]; + grads[iw] = dphi; + } +} + +template +void LCAOrbitalSetT::evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& dhpsi) +{ + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGH(P, iat, Temph); + if (Identity) + evaluate_vgh_impl(Temph, psi, dpsi, dhpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi); + } +} + +template +void LCAOrbitalSetT::evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& dhpsi, + GGGVector& dghpsi) +{ + // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not + // implemented\n"); + + // TAKE CARE OF IDENTITY + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + if (Identity) + evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi); + else + { + assert(psi.size() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize); + Product_ABt(Tempgh, C_partial_view, Tempghv); + evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi); + } +} + +/* implement using gemm algorithm */ +template +inline void LCAOrbitalSetT::evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const +{ + const size_t output_size = logdet.cols(); + std::copy_n(temp.data(0), output_size, logdet[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + for (size_t j = 0; j < output_size; j++) + { + dlogdet[i][j][0] = gx[j]; + dlogdet[i][j][1] = gy[j]; + dlogdet[i][j][2] = gz[j]; + } + std::copy_n(temp.data(4), output_size, d2logdet[i]); +} +template +void LCAOrbitalSetT::evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& psi, + GradMatrix& dpsi, + HessMatrix& d2psi) const +{ + const size_t output_size = psi.cols(); + std::copy_n(temp.data(0), output_size, psi[i]); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + + for (size_t j = 0; j < output_size; j++) + { + dpsi[i][j][0] = gx[j]; + dpsi[i][j][1] = gy[j]; + dpsi[i][j][2] = gz[j]; + + d2psi[i][j](0, 0) = hxx[j]; + d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j]; + d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j]; + d2psi[i][j](1, 1) = hyy[j]; + d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j]; + d2psi[i][j](2, 2) = hzz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + } +} + +template +void LCAOrbitalSetT::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dpsi, + HessMatrix& dgpsi, + GradMatrix& dlpsi) const +{ + const size_t output_size = dpsi.cols(); + const T* restrict gx = temp.data(1); + const T* restrict gy = temp.data(2); + const T* restrict gz = temp.data(3); + const T* restrict hxx = temp.data(4); + const T* restrict hxy = temp.data(5); + const T* restrict hxz = temp.data(6); + const T* restrict hyy = temp.data(7); + const T* restrict hyz = temp.data(8); + const T* restrict hzz = temp.data(9); + const T* restrict gh_xxx = temp.data(10); + const T* restrict gh_xxy = temp.data(11); + const T* restrict gh_xxz = temp.data(12); + const T* restrict gh_xyy = temp.data(13); + const T* restrict gh_xzz = temp.data(15); + const T* restrict gh_yyy = temp.data(16); + const T* restrict gh_yyz = temp.data(17); + const T* restrict gh_yzz = temp.data(18); + const T* restrict gh_zzz = temp.data(19); + + for (size_t j = 0; j < output_size; j++) + { + // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property + // that + // for an atomic center, the ion gradient is the negative of the + // elecron gradient. Hence minus signs for each of these. + dpsi[i][j][0] = -gx[j]; + dpsi[i][j][1] = -gy[j]; + dpsi[i][j][2] = -gz[j]; + + dgpsi[i][j](0, 0) = -hxx[j]; + dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j]; + dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j]; + dgpsi[i][j](1, 1) = -hyy[j]; + dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j]; + dgpsi[i][j](2, 2) = -hzz[j]; + + // Since this returns the ion gradient of the laplacian, we have to + // trace the grad hessian vector. + dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]); + dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]); + dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]); + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGL(P, iat, Temp); + Product_ABt(Temp, C_partial_view, Tempv); + evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGH(P, iat, Temph); + Product_ABt(Temph, C_partial_view, Temphv); + evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, Tempgh); + evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + } + else + { + assert(logdet.cols() <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateVGHGH(P, iat, this->Tempgh); + Product_ABt(this->Tempgh, C_partial_view, this->Tempghv); + evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + evaluate_ionderiv_v_impl(Temp, i, gradphi); + } + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp); + Product_ABt(this->Temp, *C, this->Tempv); + evaluate_ionderiv_v_impl(this->Tempv, i, gradphi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) +{ + if (Identity) + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } + else + { + for (size_t i = 0, iat = first; iat < last; i++, iat++) + { + myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh); + Product_ABt(this->Tempgh, *C, this->Tempghv); + evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi); + } + } +} + +template +void LCAOrbitalSetT::evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& gradphi) +{ + if (Identity) + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + evaluate_ionderiv_v_row_impl(this->Temp, gradphi); + } + else + { + myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp); + Product_ABt(Temp, *C, this->Tempv); + evaluate_ionderiv_v_row_impl(this->Tempv, gradphi); + } +} + +template +void LCAOrbitalSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +{ + if (!use_stored_copy) + *C_copy = *C; + // gemm is out-of-place + BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(), + BasisSetSize, rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize); + + /* debugging code + app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName << + std::endl; for (int j = 0; j < this->OrbitalSetSize; j++) for (int i = 0; i < + BasisSetSize; i++) + { + app_log() << " " << std::right << std::fixed << std::setprecision(16) << + std::setw(23) << std::scientific + << *(C->data() + j * BasisSetSize + i); + + if ((j * BasisSetSize + i + 1) % 4 == 0) + app_log() << std::endl; + } + */ +} + +// Class concrete types from ValueType +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class LCAOrbitalSetT; +#else +template class LCAOrbitalSetT; +#endif +#else +#ifndef MIXED_PRECISION +template class LCAOrbitalSetT>; +#else +template class LCAOrbitalSetT>; +#endif +#endif + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h new file mode 100644 index 0000000000..24e979595c --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -0,0 +1,372 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H +#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSETT_H + +#include "Numerics/DeterminantOperators.h" +#include "Numerics/MatrixOperators.h" +#include "QMCWaveFunctions/BasisSetBase.h" +#include "QMCWaveFunctions/SPOSetT.h" + +#include + +namespace qmcplusplus +{ +/** class to handle linear combinations of basis orbitals used to evaluate the + * Dirac determinants. + * + * SoA verson of LCOrtbitalSet + * Localized basis set is always real + */ +template +class LCAOrbitalSetT : public SPOSetT +{ +public: + using basis_type = SoaBasisSetBase; + using vgl_type = typename basis_type::vgl_type; + using vgh_type = typename basis_type::vgh_type; + using vghgh_type = typename basis_type::vghgh_type; + + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using PosType = typename SPOSetT::PosType; + using HessVector = typename SPOSetT::HessVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GradType = typename SPOSetT::GradType; + using OffloadMWVGLArray = typename basis_type::OffloadMWVGLArray; + using OffloadMWVArray = typename basis_type::OffloadMWVArray; + + /// pointer to the basis set + std::unique_ptr myBasisSet; + /// pointer to matrix containing the coefficients + std::shared_ptr C; + + /** constructor + * @param bs pointer to the BasisSet + */ + LCAOrbitalSetT(const std::string& my_name, std::unique_ptr&& bs); + + LCAOrbitalSetT(const LCAOrbitalSetT& in); + + std::string getClassName() const final { return "LCAOrbitalSetT"; } + + bool isRotationSupported() const final { return true; } + + bool hasIonDerivs() const final { return true; } + + std::unique_ptr> makeClone() const final; + + void storeParamsBeforeRotation() final { C_copy = std::make_shared(*C); } + + void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final; + + /** set the OrbitalSetSize and Identity=false and initialize internal + * storages + */ + void setOrbitalSetSize(int norbs) final; + + /** return the size of the basis set + */ + int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); } + + bool isIdentity() const { return Identity; }; + + /** check consistency between Identity and C + * + */ + void checkObject() const final; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const final; + + void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const final; + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const final; + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) final; + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const final; + + void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) final; + + void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) final; + + // NOTE: The data types get complicated here, so here's an overview of the + // data types associated with ionic derivatives, and how to get their + // data. + // + // NOTE: These data structures hold the data for one particular ion, and so + // the ID is implicit. + // It's up to the user to keep track of which ion these derivatives + // refer to. + // + // 1.) GradMatrix grad_phi: Holds the ionic derivatives of each SPO for + // each electron. + // Example: grad_phi[iel][iorb][idim]. iel -- electron index. + // iorb -- orbital index. + // idim -- cartesian index + // of ionic derivative. + // X=0, Y=1, Z=2. + // + // 2.) HessMatrix grad_grad_phi: Holds the ionic derivatives of the + // electron gradient components + // for each SPO and each electron. + // Example: grad_grad_phi[iel][iorb](idim,edim) iel -- + // electron index. + // iorb -- + // orbital index. + // idim -- ionic + // derivative's + // cartesian + // index. + // X=0, Y=1, + // Z=2 + // edim -- + // electron + // derivative's + // cartesian + // index. + // x=0, y=1, + // z=2. + // + // 3.) GradMatrix grad_lapl_phi: Holds the ionic derivatives of the + // electron laplacian for each SPO and each electron. + // Example: grad_lapl_phi[iel][iorb][idim]. iel -- electron + // index. + // iorb -- orbital + // index. idim -- + // cartesian index of + // ionic derivative. + // X=0, Y=1, Z=2. + + /** + * \brief Calculate ion derivatives of SPO's. + * + * @param P Electron particle set. + * @param first index of first electron + * @@param last index of last electron + * @param source Ion particle set. + * @param iat_src Index of ion. + * @param gradphi Container storing ion gradients for all particles and all + * orbitals. + */ + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi) final; + + /** + * \brief Calculate ion derivatives of SPO's, their gradients, and their + * laplacians. + * + * @param P Electron particle set. + * @param first index of first electron. + * @@param last index of last electron + * @param source Ion particle set. + * @param iat_src Index of ion. + * @param grad_phi Container storing ion gradients for all particles and + * all orbitals. + * @param grad_grad_phi Container storing ion gradients of electron + * gradients for all particles and all orbitals. + * @param grad_lapl_phi Container storing ion gradients of SPO laplacians + * for all particles and all orbitals. + */ + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) final; + + void evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& grad_phi) final; + + void createResource(ResourceCollection& collection) const final; + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const final; + +protected: + /// number of Single-particle orbitals + const IndexType BasisSetSize; + /// a copy of the original C before orbital rotation is applied; + std::shared_ptr C_copy; + + /// true if C is an identity matrix + bool Identity; + /// Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L + vgl_type Temp; + /// Tempv(OrbitalSetSize) Tempv=C*Temp + vgl_type Tempv; + + /// These are temporary VectorSoAContainers to hold value, gradient, and + /// hessian for all basis or SPO functions evaluated at a given point. + /// Nbasis x [1(value)+3(gradient)+6(hessian)] + vgh_type Temph; + /// Norbitals x [1(value)+3(gradient)+6(hessian)] + vgh_type Temphv; + + /// These are temporary VectorSoAContainers to hold value, gradient, + /// hessian, and + /// gradient hessian for all basis or SPO functions evaluated at a given + /// point. + /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempgh; + /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)] + vghgh_type Tempghv; + +private: + /// helper functions to handle Identity + void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const; + + void evaluate_vgl_impl(const vgl_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) const; + /// These two functions unpack the data in vgh_type temp object into + /// wavefunction friendly data structures. + + /// This unpacks temp into vectors psi, dpsi, and d2psi. + void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const; + + /// Unpacks temp into the ith row (or electron index) of logdet, dlogdet, + /// dhlogdet. + void evaluate_vgh_impl(const vgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet) const; + /// Unpacks data in vghgh_type temp object into wavefunction friendly data + /// structures for value, gradient, hessian and gradient hessian. + void evaluate_vghgh_impl(const vghgh_type& temp, + ValueVector& psi, + GradVector& dpsi, + HessVector& d2psi, + GGGVector& dghpsi) const; + + void evaluate_vghgh_impl(const vghgh_type& temp, + int i, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& dhlogdet, + GGGMatrix& dghlogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient result + /// into dlogdet. + void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient of + /// value, + /// electron gradient, and electron laplacian result into dlogdet, + /// dglogdet, and dllogdet respectively. + void evaluate_ionderiv_vgl_impl(const vghgh_type& temp, + int i, + GradMatrix& dlogdet, + HessMatrix& dglogdet, + GradMatrix& dllogdet) const; + + /// Unpacks data in vgl object and calculates/places ionic gradient of a + /// single row (phi_j(r)) into dlogdet. + void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const; + + void mw_evaluateVGLImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVGLArray& phi_vgl_v) const; + + /// packed walker GEMM implementation + void mw_evaluateValueImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + OffloadMWVArray& phi_v) const; + + /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet + RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader>& spo_list) const; + + struct LCAOMultiWalkerMem; + ResourceHandle mw_mem_handle_; + /// timer for basis set + NewTimer& basis_timer_; + /// timer for MO + NewTimer& mo_timer_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp deleted file mode 100644 index 674a9a6c1f..0000000000 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp +++ /dev/null @@ -1,65 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "LCAOrbitalSetWithCorrection.h" - -namespace qmcplusplus -{ -LCAOrbitalSetWithCorrection::LCAOrbitalSetWithCorrection(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs) - : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) -{} - -void LCAOrbitalSetWithCorrection::setOrbitalSetSize(int norbs) -{ - assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); - OrbitalSetSize = norbs; - cusp.setOrbitalSetSize(norbs); -} - - -std::unique_ptr LCAOrbitalSetWithCorrection::makeClone() const -{ - return std::make_unique(*this); -} - -void LCAOrbitalSetWithCorrection::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - lcao.evaluateValue(P, iat, psi); - cusp.addV(P, iat, psi); -} - -void LCAOrbitalSetWithCorrection::evaluateVGL(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) -{ - lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); - cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); -} - -void LCAOrbitalSetWithCorrection::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - for (size_t i = 0, iat = first; iat < last; i++, iat++) - cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h index 65185973d2..6b25e719e2 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h @@ -13,57 +13,12 @@ #ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H #define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTION_TEMP_H -#include "QMCWaveFunctions/SPOSet.h" -#include "QMCWaveFunctions/BasisSetBase.h" -#include "LCAOrbitalSet.h" -#include "SoaCuspCorrection.h" - +#include "Configuration.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h" namespace qmcplusplus { -/** class to add cusp correction to LCAOrbitalSet. - * - */ -class LCAOrbitalSetWithCorrection : public SPOSet -{ -public: - using basis_type = LCAOrbitalSet::basis_type; - /** constructor - * @param ions - * @param els - * @param bs pointer to the BasisSet - * @param rl report level - */ - LCAOrbitalSetWithCorrection(const std::string& my_name, - ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs); - - LCAOrbitalSetWithCorrection(const LCAOrbitalSetWithCorrection& in) = default; - - std::string getClassName() const final { return "LCAOrbitalSetWithCorrection"; } - - std::unique_ptr makeClone() const final; - - void setOrbitalSetSize(int norbs) final; - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) final; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) final; - - friend class LCAOrbitalBuilder; - -private: - LCAOrbitalSet lcao; +using LCAOrbitalSetWithCorrection = LCAOrbitalSetWithCorrectionT; - SoaCuspCorrection cusp; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp new file mode 100644 index 0000000000..bc3379c4f1 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp @@ -0,0 +1,74 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#include "LCAOrbitalSetWithCorrectionT.h" + +namespace qmcplusplus +{ +template +LCAOrbitalSetWithCorrectionT::LCAOrbitalSetWithCorrectionT(const std::string& my_name, + ParticleSetT& ions, + ParticleSetT& els, + std::unique_ptr&& bs) + : SPOSetT(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) +{} + +template +void LCAOrbitalSetWithCorrectionT::setOrbitalSetSize(int norbs) +{ + assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); + this->OrbitalSetSize = norbs; + cusp.setOrbitalSetSize(norbs); +} + +template +std::unique_ptr> LCAOrbitalSetWithCorrectionT::makeClone() const +{ + return std::make_unique>(*this); +} + +template +void LCAOrbitalSetWithCorrectionT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + lcao.evaluateValue(P, iat, psi); + cusp.addV(P, iat, psi); +} + +template +void LCAOrbitalSetWithCorrectionT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + lcao.evaluateVGL(P, iat, psi, dpsi, d2psi); + cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi); +} + +template +void LCAOrbitalSetWithCorrectionT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + for (size_t i = 0, iat = first; iat < last; i++, iat++) + cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet); +} + +#ifndef MIXED_PRECISION +template class LCAOrbitalSetWithCorrectionT; +#else +template class LCAOrbitalSetWithCorrectionT; +#endif +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h new file mode 100644 index 0000000000..d086c7edb8 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h @@ -0,0 +1,74 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H +#define QMCPLUSPLUS_SOA_LINEARCOMIBINATIONORBITALSET_WITH_CORRECTIONT_H + +#include "LCAOrbitalSetT.h" +#include "QMCWaveFunctions/BasisSetBase.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "SoaCuspCorrectionT.h" + +namespace qmcplusplus +{ +/** class to add cusp correction to LCAOrbitalSet. + * + */ + +template +class LCAOrbitalSetWithCorrectionT : public SPOSetT +{ +public: + using basis_type = typename LCAOrbitalSetT::basis_type; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + /** constructor + * @param ions + * @param els + * @param bs pointer to the BasisSet + * @param rl report level + */ + LCAOrbitalSetWithCorrectionT(const std::string& my_name, + ParticleSetT& ions, + ParticleSetT& els, + std::unique_ptr&& bs); + + LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default; + + std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; } + + std::unique_ptr> makeClone() const final; + + void setOrbitalSetSize(int norbs) final; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; + + template + friend class LCAOrbitalBuilderT; + +private: + LCAOrbitalSetT lcao; + + SoaCuspCorrectionT cusp; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h index 6a4d346551..97e57db7ae 100644 --- a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h +++ b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h @@ -19,6 +19,9 @@ #include "Message/MPIObjectBase.h" #include "ModernStringUtils.hpp" #include "hdf/hdf_archive.h" +#include "LCAO/MultiQuinticSpline1D.h" +#include "LCAO/SoaAtomicBasisSet.h" +#include "LCAO/SoaAtomicBasisSetT.h" namespace qmcplusplus { @@ -240,5 +243,60 @@ class RadialOrbitalSetBuilder, SH>> : m_orbitals.setRmax(0); //set Rmax } }; + +template +class RadialOrbitalSetBuilder, SH, ORBT>> : public MPIObjectBase +{ +public: + using COT = SoaAtomicBasisSetT, SH, ORBT>; + using RadialOrbital_t = MultiFunctorAdapter; + using single_type = typename RadialOrbital_t::single_type; + + ///true, if the RadialOrbitalType is normalized + bool Normalized; + ///orbitals to build + COT& m_orbitals; + + ///constructor + RadialOrbitalSetBuilder(Communicate* comm, COT& aos) : MPIObjectBase(comm), Normalized(true), m_orbitals(aos) {} + + ///implement functions used by AOBasisBuilder + bool addGrid(xmlNodePtr cur, const std::string& rad_type) { return true; } + bool addGridH5(hdf_archive& hin) { return true; } + bool openNumericalBasisH5(xmlNodePtr cur) { return true; } + bool put(xmlNodePtr cur) + { + const std::string a(lowerCase(getXMLAttributeValue(cur, "normalized"))); + if (a == "no") + Normalized = false; + return true; + } + + bool addRadialOrbital(xmlNodePtr cur, const std::string& rad_type, const QuantumNumberType& nlms) + { + auto radorb = std::make_unique(nlms[q_l], Normalized); + radorb->putBasisGroup(cur); + + m_orbitals.RnlID.push_back(nlms); + m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb)); + return true; + } + + bool addRadialOrbitalH5(hdf_archive& hin, const std::string& rad_type, const QuantumNumberType& nlms) + { + auto radorb = std::make_unique(nlms[q_l], Normalized); + radorb->putBasisGroupH5(hin, *myComm); + + m_orbitals.RnlID.push_back(nlms); + m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb)); + + return true; + } + + void finalize() + { + m_orbitals.setRmax(0); //set Rmax + } +}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h index 46ed806508..643c4e78b8 100644 --- a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h +++ b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h @@ -166,7 +166,7 @@ bool RadialOrbitalSetBuilder::addGrid(xmlNodePtr cur, const std::string& ra hin.pop(); } else - input_grid = OneDimGridFactory::createGrid(cur); + input_grid = OneDimGridFactory::createGrid(cur); //set zero to use std::max m_rcut_safe = 0; diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSet.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSet.h index a23b411763..303086f2bf 100644 --- a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSet.h +++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSet.h @@ -14,9 +14,8 @@ #ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H #define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H -#include "CPU/math.hpp" -#include "OptimizableObject.h" -#include +#include "Configuration.h" +#include "SoaAtomicBasisSetT.h" namespace qmcplusplus { @@ -28,1097 +27,7 @@ namespace qmcplusplus * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$ */ template -class SoaAtomicBasisSet -{ -public: - using RadialOrbital_t = ROT; - using RealType = typename ROT::RealType; - using GridType = typename ROT::GridType; - using ValueType = typename QMCTraits::ValueType; - using OffloadArray4D = Array>; - using OffloadArray3D = Array>; - using OffloadArray2D = Array>; - using OffloadVector = Vector>; - using OffloadIntVector = Vector>; - - ///the constructor - explicit SoaAtomicBasisSet(int lmax, bool addsignforM = false) - : Ylm(lmax, addsignforM), - periodic_image_phase_factors_ptr_(std::make_shared()), - periodic_image_displacements_ptr_(std::make_shared()), - periodic_image_phase_factors_(*periodic_image_phase_factors_ptr_), - periodic_image_displacements_(*periodic_image_displacements_ptr_), - NL_ptr_(std::make_shared()), - LM_ptr_(std::make_shared()), - NL(*NL_ptr_), - LM(*LM_ptr_), - ylm_timer_(createGlobalTimer("SoaAtomicBasisSet::Ylm", timer_level_fine)), - rnl_timer_(createGlobalTimer("SoaAtomicBasisSet::Rnl", timer_level_fine)), - pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::pbc_images", timer_level_fine)), - nelec_pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::nelec_pbc_images", timer_level_fine)), - phase_timer_(createGlobalTimer("SoaAtomicBasisSet::phase", timer_level_fine)), - psi_timer_(createGlobalTimer("SoaAtomicBasisSet::psi", timer_level_fine)) - {} - - void checkInVariables(opt_variables_type& active) - { - //for(size_t nl=0; nlcheckInVariables(active); - } - - void checkOutVariables(const opt_variables_type& active) - { - //for(size_t nl=0; nlcheckOutVariables(active); - } - - void resetParameters(const opt_variables_type& active) - { - //for(size_t nl=0; nlresetParameters(active); - } - - /** return the number of basis functions - */ - inline int getBasisSetSize() const - { - //=NL.size(); - return BasisSetSize; - } - - /** Set the number of periodic image for the evaluation of the orbitals and the phase factor. - * In the case of Non-PBC, PBCImages=(1,1,1), SuperTwist(0,0,0) and the PhaseFactor=1. - */ - void setPBCParams(const TinyVector& pbc_images, - const TinyVector supertwist, - const OffloadVector& PeriodicImagePhaseFactors, - const OffloadArray2D& PeriodicImageDisplacements) - { - PBCImages = pbc_images; - periodic_image_phase_factors_ = PeriodicImagePhaseFactors; - periodic_image_displacements_ = PeriodicImageDisplacements; - SuperTwist = supertwist; - - periodic_image_phase_factors_.updateTo(); - periodic_image_displacements_.updateTo(); - } - - - /** implement a BasisSetBase virtual function - * - * Set Rmax and BasisSetSize - * @todo Should be able to overwrite Rmax to be much smaller than the maximum grid - */ - inline void finalize() - { - BasisSetSize = LM.size(); - NL.updateTo(); - LM.updateTo(); - tempS.resize(std::max(Ylm.size(), RnlID.size())); - } - - /** Set Rmax */ - template - inline void setRmax(T rmax) - { - Rmax = (rmax > 0) ? rmax : MultiRnl.rmax(); - } - - ///set the current offset - inline void setCenter(int c, int offset) {} - - /// Sets a boolean vector for S-type orbitals. Used for cusp correction. - void queryOrbitalsForSType(std::vector& s_orbitals) const - { - for (int i = 0; i < BasisSetSize; i++) - { - s_orbitals[i] = (RnlID[NL[i]][1] == 0); - } - } - - /** evaluate VGL - */ - template - inline void evaluateVGL(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGL& vgl, PosType Tv) - { - int TransX, TransY, TransZ; - - PosType dr_new; - T r_new; - // T psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new; - -#if not defined(QMC_COMPLEX) - const ValueType correctphase = 1; -#else - - RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2]; - RealType s, c; - qmcplusplus::sincos(-phasearg, &s, &c); - const ValueType correctphase(c, s); -#endif - - constexpr T cone(1); - constexpr T ctwo(2); - - - //one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - - //V,Gx,Gy,Gz,L - auto* restrict psi = vgl.data(0) + offset; - const T* restrict ylm_v = Ylm[0]; //value - auto* restrict dpsi_x = vgl.data(1) + offset; - const T* restrict ylm_x = Ylm[1]; //gradX - auto* restrict dpsi_y = vgl.data(2) + offset; - const T* restrict ylm_y = Ylm[2]; //gradY - auto* restrict dpsi_z = vgl.data(3) + offset; - const T* restrict ylm_z = Ylm[3]; //gradZ - auto* restrict d2psi = vgl.data(4) + offset; - const T* restrict ylm_l = Ylm[4]; //lap - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - psi[ib] = 0; - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - d2psi[ib] = 0; - } - //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax) - int iter = -1; - for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - - dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); - dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); - dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); - - r_new = std::sqrt(dot(dr_new, dr_new)); - - iter++; - if (r_new >= Rmax) - continue; - - //SIGN Change!! - const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; - Ylm.evaluateVGL(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi); - - const T rinv = cone / r_new; - - ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. - const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - const int nl(NL[ib]); - const int lm(LM[ib]); - const T drnloverr = rinv * dphi[nl]; - const T ang = ylm_v[lm]; - const T gr_x = drnloverr * x; - const T gr_y = drnloverr * y; - const T gr_z = drnloverr * z; - const T ang_x = ylm_x[lm]; - const T ang_y = ylm_y[lm]; - const T ang_z = ylm_z[lm]; - const T vr = phi[nl]; - - psi[ib] += ang * vr * Phase; - dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase; - dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase; - dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase; - d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) + - vr * ylm_l[lm]) * - Phase; - } - } - } - } - } - - template - inline void evaluateVGH(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGH& vgh) - { - int TransX, TransY, TransZ; - - PosType dr_new; - T r_new; - - constexpr T cone(1); - - //one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - - //V,Gx,Gy,Gz,L - auto* restrict psi = vgh.data(0) + offset; - const T* restrict ylm_v = Ylm[0]; //value - auto* restrict dpsi_x = vgh.data(1) + offset; - const T* restrict ylm_x = Ylm[1]; //gradX - auto* restrict dpsi_y = vgh.data(2) + offset; - const T* restrict ylm_y = Ylm[2]; //gradY - auto* restrict dpsi_z = vgh.data(3) + offset; - const T* restrict ylm_z = Ylm[3]; //gradZ - - auto* restrict dhpsi_xx = vgh.data(4) + offset; - const T* restrict ylm_xx = Ylm[4]; - auto* restrict dhpsi_xy = vgh.data(5) + offset; - const T* restrict ylm_xy = Ylm[5]; - auto* restrict dhpsi_xz = vgh.data(6) + offset; - const T* restrict ylm_xz = Ylm[6]; - auto* restrict dhpsi_yy = vgh.data(7) + offset; - const T* restrict ylm_yy = Ylm[7]; - auto* restrict dhpsi_yz = vgh.data(8) + offset; - const T* restrict ylm_yz = Ylm[8]; - auto* restrict dhpsi_zz = vgh.data(9) + offset; - const T* restrict ylm_zz = Ylm[9]; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - psi[ib] = 0; - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - dhpsi_xx[ib] = 0; - dhpsi_xy[ib] = 0; - dhpsi_xz[ib] = 0; - dhpsi_yy[ib] = 0; - dhpsi_yz[ib] = 0; - dhpsi_zz[ib] = 0; - // d2psi[ib] = 0; - } - - for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); - dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); - dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); - r_new = std::sqrt(dot(dr_new, dr_new)); - - //const size_t ib_max=NL.size(); - if (r_new >= Rmax) - continue; - - //SIGN Change!! - const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; - Ylm.evaluateVGH(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi); - - const T rinv = cone / r_new; - - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - const int nl(NL[ib]); - const int lm(LM[ib]); - const T drnloverr = rinv * dphi[nl]; - const T ang = ylm_v[lm]; - const T gr_x = drnloverr * x; - const T gr_y = drnloverr * y; - const T gr_z = drnloverr * z; - - //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is - // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r}) - // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in - // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp. - const T gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr); - const T gr_xx = x * x * gr2_tmp + drnloverr; - const T gr_xy = x * y * gr2_tmp; - const T gr_xz = x * z * gr2_tmp; - const T gr_yy = y * y * gr2_tmp + drnloverr; - const T gr_yz = y * z * gr2_tmp; - const T gr_zz = z * z * gr2_tmp + drnloverr; - - const T ang_x = ylm_x[lm]; - const T ang_y = ylm_y[lm]; - const T ang_z = ylm_z[lm]; - const T ang_xx = ylm_xx[lm]; - const T ang_xy = ylm_xy[lm]; - const T ang_xz = ylm_xz[lm]; - const T ang_yy = ylm_yy[lm]; - const T ang_yz = ylm_yz[lm]; - const T ang_zz = ylm_zz[lm]; - - const T vr = phi[nl]; - - psi[ib] += ang * vr; - dpsi_x[ib] += ang * gr_x + vr * ang_x; - dpsi_y[ib] += ang * gr_y + vr * ang_y; - dpsi_z[ib] += ang * gr_z + vr * ang_z; - - - // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y - // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y) - dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; - dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; - dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; - dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; - dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; - dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; - } - } - } - } - } - - template - inline void evaluateVGHGH(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGHGH& vghgh) - { - int TransX, TransY, TransZ; - - PosType dr_new; - T r_new; - - constexpr T cone(1); - - //one can assert the alignment - RealType* restrict phi = tempS.data(0); - RealType* restrict dphi = tempS.data(1); - RealType* restrict d2phi = tempS.data(2); - RealType* restrict d3phi = tempS.data(3); - - //V,Gx,Gy,Gz,L - auto* restrict psi = vghgh.data(0) + offset; - const T* restrict ylm_v = Ylm[0]; //value - auto* restrict dpsi_x = vghgh.data(1) + offset; - const T* restrict ylm_x = Ylm[1]; //gradX - auto* restrict dpsi_y = vghgh.data(2) + offset; - const T* restrict ylm_y = Ylm[2]; //gradY - auto* restrict dpsi_z = vghgh.data(3) + offset; - const T* restrict ylm_z = Ylm[3]; //gradZ - - auto* restrict dhpsi_xx = vghgh.data(4) + offset; - const T* restrict ylm_xx = Ylm[4]; - auto* restrict dhpsi_xy = vghgh.data(5) + offset; - const T* restrict ylm_xy = Ylm[5]; - auto* restrict dhpsi_xz = vghgh.data(6) + offset; - const T* restrict ylm_xz = Ylm[6]; - auto* restrict dhpsi_yy = vghgh.data(7) + offset; - const T* restrict ylm_yy = Ylm[7]; - auto* restrict dhpsi_yz = vghgh.data(8) + offset; - const T* restrict ylm_yz = Ylm[8]; - auto* restrict dhpsi_zz = vghgh.data(9) + offset; - const T* restrict ylm_zz = Ylm[9]; - - auto* restrict dghpsi_xxx = vghgh.data(10) + offset; - const T* restrict ylm_xxx = Ylm[10]; - auto* restrict dghpsi_xxy = vghgh.data(11) + offset; - const T* restrict ylm_xxy = Ylm[11]; - auto* restrict dghpsi_xxz = vghgh.data(12) + offset; - const T* restrict ylm_xxz = Ylm[12]; - auto* restrict dghpsi_xyy = vghgh.data(13) + offset; - const T* restrict ylm_xyy = Ylm[13]; - auto* restrict dghpsi_xyz = vghgh.data(14) + offset; - const T* restrict ylm_xyz = Ylm[14]; - auto* restrict dghpsi_xzz = vghgh.data(15) + offset; - const T* restrict ylm_xzz = Ylm[15]; - auto* restrict dghpsi_yyy = vghgh.data(16) + offset; - const T* restrict ylm_yyy = Ylm[16]; - auto* restrict dghpsi_yyz = vghgh.data(17) + offset; - const T* restrict ylm_yyz = Ylm[17]; - auto* restrict dghpsi_yzz = vghgh.data(18) + offset; - const T* restrict ylm_yzz = Ylm[18]; - auto* restrict dghpsi_zzz = vghgh.data(19) + offset; - const T* restrict ylm_zzz = Ylm[19]; - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - psi[ib] = 0; - - dpsi_x[ib] = 0; - dpsi_y[ib] = 0; - dpsi_z[ib] = 0; - - dhpsi_xx[ib] = 0; - dhpsi_xy[ib] = 0; - dhpsi_xz[ib] = 0; - dhpsi_yy[ib] = 0; - dhpsi_yz[ib] = 0; - dhpsi_zz[ib] = 0; - - dghpsi_xxx[ib] = 0; - dghpsi_xxy[ib] = 0; - dghpsi_xxz[ib] = 0; - dghpsi_xyy[ib] = 0; - dghpsi_xyz[ib] = 0; - dghpsi_xzz[ib] = 0; - dghpsi_yyy[ib] = 0; - dghpsi_yyz[ib] = 0; - dghpsi_yzz[ib] = 0; - dghpsi_zzz[ib] = 0; - } - - for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); - dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); - dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); - r_new = std::sqrt(dot(dr_new, dr_new)); - - //const size_t ib_max=NL.size(); - if (r_new >= Rmax) - continue; - - //SIGN Change!! - const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; - Ylm.evaluateVGHGH(x, y, z); - - MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi); - - const T rinv = cone / r_new; - const T xu = x * rinv, yu = y * rinv, zu = z * rinv; - for (size_t ib = 0; ib < BasisSetSize; ++ib) - { - const int nl(NL[ib]); - const int lm(LM[ib]); - const T drnloverr = rinv * dphi[nl]; - const T ang = ylm_v[lm]; - const T gr_x = drnloverr * x; - const T gr_y = drnloverr * y; - const T gr_z = drnloverr * z; - - //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is - // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r}) - // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in - // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp. This is p(r) in the notes. - const T gr2_tmp = rinv * (d2phi[nl] - drnloverr); - - const T gr_xx = x * xu * gr2_tmp + drnloverr; - const T gr_xy = x * yu * gr2_tmp; - const T gr_xz = x * zu * gr2_tmp; - const T gr_yy = y * yu * gr2_tmp + drnloverr; - const T gr_yz = y * zu * gr2_tmp; - const T gr_zz = z * zu * gr2_tmp + drnloverr; - - //This is q(r) in the notes. - const T gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp; - - const T gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu); - const T gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu; - const T gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu; - const T gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu; - const T gr_xyz = xu * yu * zu * gr3_tmp; - const T gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu; - const T gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu); - const T gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu; - const T gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu; - const T gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu); - - - //Angular derivatives up to third - const T ang_x = ylm_x[lm]; - const T ang_y = ylm_y[lm]; - const T ang_z = ylm_z[lm]; - - const T ang_xx = ylm_xx[lm]; - const T ang_xy = ylm_xy[lm]; - const T ang_xz = ylm_xz[lm]; - const T ang_yy = ylm_yy[lm]; - const T ang_yz = ylm_yz[lm]; - const T ang_zz = ylm_zz[lm]; - - const T ang_xxx = ylm_xxx[lm]; - const T ang_xxy = ylm_xxy[lm]; - const T ang_xxz = ylm_xxz[lm]; - const T ang_xyy = ylm_xyy[lm]; - const T ang_xyz = ylm_xyz[lm]; - const T ang_xzz = ylm_xzz[lm]; - const T ang_yyy = ylm_yyy[lm]; - const T ang_yyz = ylm_yyz[lm]; - const T ang_yzz = ylm_yzz[lm]; - const T ang_zzz = ylm_zzz[lm]; - - const T vr = phi[nl]; - - psi[ib] += ang * vr; - dpsi_x[ib] += ang * gr_x + vr * ang_x; - dpsi_y[ib] += ang * gr_y + vr * ang_y; - dpsi_z[ib] += ang * gr_z + vr * ang_z; - - - // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y - // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y) - dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; - dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; - dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; - dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; - dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; - dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; - - dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx; - dghpsi_xxy[ib] += - gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x; - dghpsi_xxz[ib] += - gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x; - dghpsi_xyy[ib] += - gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y; - dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x + - ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y; - dghpsi_xzz[ib] += - gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z; - dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy; - dghpsi_yyz[ib] += - gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y; - dghpsi_yzz[ib] += - gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z; - dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz; - } - } - } - } - } - - /** evaluate V - */ - template - inline void evaluateV(const LAT& lattice, const T r, const PosType& dr, VT* restrict psi, PosType Tv) - { - int TransX, TransY, TransZ; - - PosType dr_new; - T r_new; - -#if not defined(QMC_COMPLEX) - const ValueType correctphase = 1.0; -#else - - RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2]; - RealType s, c; - qmcplusplus::sincos(-phasearg, &s, &c); - const ValueType correctphase(c, s); - -#endif - - RealType* restrict ylm_v = tempS.data(0); - RealType* restrict phi_r = tempS.data(1); - - for (size_t ib = 0; ib < BasisSetSize; ++ib) - psi[ib] = 0; - //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax) - int iter = -1; - for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); - for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); - for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z - { - //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... - TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); - - dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); - dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); - dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); - - r_new = std::sqrt(dot(dr_new, dr_new)); - iter++; - if (r_new >= Rmax) - continue; - - Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v); - MultiRnl.evaluate(r_new, phi_r); - ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. - const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase; - for (size_t ib = 0; ib < BasisSetSize; ++ib) - psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase; - } - } - } - } - - /** - * @brief evaluate VGL for multiple electrons - * - * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]]. - * These elements are assumed to be zero when passed to this function. - * This function only uses only one center (center_idx) from displ_list - * - * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers] - * @param [in] lattice crystal lattice - * @param [in,out] psi_vgl wavefunction vgl for all electrons [5, nElec, nBasTot] - * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened) - * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened) - * @param [in] nElec number of electrons - * @param [in] nBasTot total number of basis functions represented in psi_vgl - * @param [in] center_idx current center index (for indexing into displ_list) - * @param [in] BasisOffset index of first basis function of this center (for indexing into psi_vgl) - * @param [in] NumCenters total number of centers in system (for indexing into displ_list) - * - */ - - template - inline void mw_evaluateVGL(const RefVectorWithLeader& atom_bs_list, - const LAT& lattice, - Array>& psi_vgl, - const Vector>& displ_list, - const Vector>& Tv_list, - const size_t nElec, - const size_t nBasTot, - const size_t center_idx, - const size_t BasisOffset, - const size_t NumCenters) - { - assert(this == &atom_bs_list.getLeader()); - auto& atom_bs_leader = atom_bs_list.template getCastedLeader>(); - - int Nx = PBCImages[0] + 1; - int Ny = PBCImages[1] + 1; - int Nz = PBCImages[2] + 1; - const int Nxyz = Nx * Ny * Nz; - - assert(psi_vgl.size(0) == 5); - assert(psi_vgl.size(1) == nElec); - assert(psi_vgl.size(2) == nBasTot); - - - auto& ylm_vgl = atom_bs_leader.mw_mem_handle_.getResource().ylm_vgl; - auto& rnl_vgl = atom_bs_leader.mw_mem_handle_.getResource().rnl_vgl; - auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr; - auto& r = atom_bs_leader.mw_mem_handle_.getResource().r; - - size_t nRnl = RnlID.size(); - size_t nYlm = Ylm.size(); - - ylm_vgl.resize(5, nElec, Nxyz, nYlm); - rnl_vgl.resize(3, nElec, Nxyz, nRnl); - dr.resize(nElec, Nxyz, 3); - r.resize(nElec, Nxyz); - - - // TODO: move these outside? - auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase; - correctphase.resize(nElec); - - auto* dr_ptr = dr.data(); - auto* r_ptr = r.data(); - - auto* correctphase_ptr = correctphase.data(); - - auto* Tv_list_ptr = Tv_list.data(); - auto* displ_list_ptr = displ_list.data(); - - constexpr RealType cone(1); - constexpr RealType ctwo(2); - - //V,Gx,Gy,Gz,L - auto* restrict psi_ptr = psi_vgl.data_at(0, 0, 0); - auto* restrict dpsi_x_ptr = psi_vgl.data_at(1, 0, 0); - auto* restrict dpsi_y_ptr = psi_vgl.data_at(2, 0, 0); - auto* restrict dpsi_z_ptr = psi_vgl.data_at(3, 0, 0); - auto* restrict d2psi_ptr = psi_vgl.data_at(4, 0, 0); - - { - ScopedTimer local_timer(phase_timer_); -#if not defined(QMC_COMPLEX) - - PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - correctphase_ptr[i_e] = 1.0; - -#else - auto* SuperTwist_ptr = SuperTwist.data(); - - PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ - Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - { - //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1); - RealType phasearg = 0; - for (size_t i_dim = 0; i_dim < 3; i_dim++) - phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)]; - RealType s, c; - qmcplusplus::sincos(-phasearg, &s, &c); - correctphase_ptr[i_e] = ValueType(c, s); - } -#endif - } - - { - ScopedTimer local_timer(nelec_pbc_timer_); - auto* periodic_image_displacements_ptr = periodic_image_displacements_.data(); - PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ - map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ - map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) - { - RealType tmp_r2 = 0.0; - for (size_t i_dim = 0; i_dim < 3; i_dim++) - { - dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] + - periodic_image_displacements_ptr[i_dim + 3 * i_xyz]); - tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)]; - } - r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2); - //printf("particle %lu image %d, %lf, %lf\n", i_e, i_xyz, tmp_r2, dr_ptr[3 * (i_xyz + Nxyz * i_e)]); - } - } - - { - ScopedTimer local(rnl_timer_); - MultiRnl.batched_evaluateVGL(r, rnl_vgl, Rmax); - } - - { - ScopedTimer local(ylm_timer_); - Ylm.batched_evaluateVGL(dr, ylm_vgl); - } - - { - ScopedTimer local_timer(psi_timer_); - auto* phase_fac_ptr = periodic_image_phase_factors_.data(); - auto* LM_ptr = LM.data(); - auto* NL_ptr = NL.data(); - const int bset_size = BasisSetSize; - - RealType* restrict phi_ptr = rnl_vgl.data_at(0, 0, 0, 0); - RealType* restrict dphi_ptr = rnl_vgl.data_at(1, 0, 0, 0); - RealType* restrict d2phi_ptr = rnl_vgl.data_at(2, 0, 0, 0); - - - const RealType* restrict ylm_v_ptr = ylm_vgl.data_at(0, 0, 0, 0); //value - const RealType* restrict ylm_x_ptr = ylm_vgl.data_at(1, 0, 0, 0); //gradX - const RealType* restrict ylm_y_ptr = ylm_vgl.data_at(2, 0, 0, 0); //gradY - const RealType* restrict ylm_z_ptr = ylm_vgl.data_at(3, 0, 0, 0); //gradZ - const RealType* restrict ylm_l_ptr = ylm_vgl.data_at(4, 0, 0, 0); //lap - PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ - map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ - map(to:ylm_v_ptr[:nYlm*nElec*Nxyz], ylm_x_ptr[:nYlm*nElec*Nxyz], ylm_y_ptr[:nYlm*nElec*Nxyz], ylm_z_ptr[:nYlm*nElec*Nxyz], ylm_l_ptr[:nYlm*nElec*Nxyz], \ - phi_ptr[:nRnl*nElec*Nxyz], dphi_ptr[:nRnl*nElec*Nxyz], d2phi_ptr[:nRnl*nElec*Nxyz], \ - psi_ptr[:nBasTot*nElec], dpsi_x_ptr[:nBasTot*nElec], dpsi_y_ptr[:nBasTot*nElec], dpsi_z_ptr[:nBasTot*nElec], d2psi_ptr[:nBasTot*nElec], \ - correctphase_ptr[:nElec], r_ptr[:nElec*Nxyz], dr_ptr[:3*nElec*Nxyz]) ") - for (int i_e = 0; i_e < nElec; i_e++) - for (int ib = 0; ib < bset_size; ++ib) - { - const int nl(NL_ptr[ib]); - const int lm(LM_ptr[ib]); - VT psi = 0; - VT dpsi_x = 0; - VT dpsi_y = 0; - VT dpsi_z = 0; - VT d2psi = 0; - - for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) - { - const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e]; - const RealType rinv = cone / r_ptr[i_xyz + Nxyz * i_e]; - const RealType x = dr_ptr[0 + 3 * (i_xyz + Nxyz * i_e)]; - const RealType y = dr_ptr[1 + 3 * (i_xyz + Nxyz * i_e)]; - const RealType z = dr_ptr[2 + 3 * (i_xyz + Nxyz * i_e)]; - const RealType drnloverr = rinv * dphi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]; - const RealType ang = ylm_v_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; - const RealType gr_x = drnloverr * x; - const RealType gr_y = drnloverr * y; - const RealType gr_z = drnloverr * z; - const RealType ang_x = ylm_x_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; - const RealType ang_y = ylm_y_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; - const RealType ang_z = ylm_z_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; - const RealType vr = phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]; - - psi += ang * vr * Phase; - dpsi_x += (ang * gr_x + vr * ang_x) * Phase; - dpsi_y += (ang * gr_y + vr * ang_y) * Phase; - dpsi_z += (ang * gr_z + vr * ang_z) * Phase; - d2psi += (ang * (ctwo * drnloverr + d2phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]) + - ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) + - vr * ylm_l_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]) * - Phase; - } - - psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi; - dpsi_x_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_x; - dpsi_y_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_y; - dpsi_z_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_z; - d2psi_ptr[BasisOffset + ib + i_e * nBasTot] = d2psi; - } - } - } - - /** - * @brief evaluate for multiple electrons - * - * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]]. - * These elements are assumed to be zero when passed to this function. - * This function only uses only one center (center_idx) from displ_list - * - * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers] - * @param [in] lattice crystal lattice - * @param [in,out] psi wavefunction values for all electrons [nElec, nBasTot] - * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened) - * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened) - * @param [in] nElec number of electrons - * @param [in] nBasTot total number of basis functions represented in psi - * @param [in] center_idx current center index (for indexing into displ_list) - * @param [in] BasisOffset index of first basis function of this center (for indexing into psi) - * @param [in] NumCenters total number of centers in system (for indexing into displ_list) - * - */ - template - inline void mw_evaluateV(const RefVectorWithLeader& atom_bs_list, - const LAT& lattice, - Array>& psi, - const Vector>& displ_list, - const Vector>& Tv_list, - const size_t nElec, - const size_t nBasTot, - const size_t center_idx, - const size_t BasisOffset, - const size_t NumCenters) - { - assert(this == &atom_bs_list.getLeader()); - auto& atom_bs_leader = atom_bs_list.template getCastedLeader>(); - //TODO: use QMCTraits::DIM instead of 3? - // DIM==3 is baked into so many parts here that it's probably not worth it for now - const int Nx = PBCImages[0] + 1; - const int Ny = PBCImages[1] + 1; - const int Nz = PBCImages[2] + 1; - const int Nxyz = Nx * Ny * Nz; - assert(psi.size(0) == nElec); - assert(psi.size(1) == nBasTot); - - - auto& ylm_v = atom_bs_leader.mw_mem_handle_.getResource().ylm_v; - auto& rnl_v = atom_bs_leader.mw_mem_handle_.getResource().rnl_v; - auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr; - auto& r = atom_bs_leader.mw_mem_handle_.getResource().r; - - const size_t nRnl = RnlID.size(); - const size_t nYlm = Ylm.size(); - - ylm_v.resize(nElec, Nxyz, nYlm); - rnl_v.resize(nElec, Nxyz, nRnl); - dr.resize(nElec, Nxyz, 3); - r.resize(nElec, Nxyz); - - // TODO: move these outside? - auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase; - correctphase.resize(nElec); - - auto* dr_ptr = dr.data(); - auto* r_ptr = r.data(); - - auto* correctphase_ptr = correctphase.data(); - - auto* Tv_list_ptr = Tv_list.data(); - auto* displ_list_ptr = displ_list.data(); - - // need to map Tensor vals to device - auto* latR_ptr = lattice.R.data(); - - - { - ScopedTimer local_timer(phase_timer_); -#if not defined(QMC_COMPLEX) - - PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - correctphase_ptr[i_e] = 1.0; - -#else - auto* SuperTwist_ptr = SuperTwist.data(); - - PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ - Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - { - //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1); - RealType phasearg = 0; - for (size_t i_dim = 0; i_dim < 3; i_dim++) - phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)]; - RealType s, c; - qmcplusplus::sincos(-phasearg, &s, &c); - correctphase_ptr[i_e] = ValueType(c, s); - } -#endif - } - - { - ScopedTimer local_timer(nelec_pbc_timer_); - auto* periodic_image_displacements_ptr = periodic_image_displacements_.data(); - PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ - map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ - map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ") - for (size_t i_e = 0; i_e < nElec; i_e++) - for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) - { - RealType tmp_r2 = 0.0; - for (size_t i_dim = 0; i_dim < 3; i_dim++) - { - dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] + - periodic_image_displacements_ptr[i_dim + 3 * i_xyz]); - tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)]; - } - r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2); - } - } - - - { - ScopedTimer local(rnl_timer_); - MultiRnl.batched_evaluate(r, rnl_v, Rmax); - } - - { - ScopedTimer local(ylm_timer_); - Ylm.batched_evaluateV(dr, ylm_v); - } - - { - ScopedTimer local_timer(psi_timer_); - ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. - auto* phase_fac_ptr = periodic_image_phase_factors_.data(); - auto* LM_ptr = LM.data(); - auto* NL_ptr = NL.data(); - auto* psi_ptr = psi.data(); - const int bset_size = BasisSetSize; - - auto* ylm_ptr = ylm_v.data(); - auto* rnl_ptr = rnl_v.data(); - PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ - map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ - map(to:ylm_ptr[:nYlm*nElec*Nxyz], rnl_ptr[:nRnl*nElec*Nxyz], psi_ptr[:nBasTot*nElec], correctphase_ptr[:nElec])") - for (int i_e = 0; i_e < nElec; i_e++) - for (int ib = 0; ib < bset_size; ++ib) - { - VT psi = 0; - for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) - { - const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e]; - psi += ylm_ptr[(i_xyz + Nxyz * i_e) * nYlm + LM_ptr[ib]] * - rnl_ptr[(i_xyz + Nxyz * i_e) * nRnl + NL_ptr[ib]] * Phase; - } - psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi; - } - } - } - - void createResource(ResourceCollection& collection) const - { - collection.addResource(std::make_unique()); - } - - void acquireResource(ResourceCollection& collection, - const RefVectorWithLeader& atom_basis_list) const - { - assert(this == &atom_basis_list.getLeader()); - atom_basis_list.template getCastedLeader().mw_mem_handle_ = - collection.lendResource(); - } - - void releaseResource(ResourceCollection& collection, - const RefVectorWithLeader& atom_basis_list) const - { - assert(this == &atom_basis_list.getLeader()); - collection.takebackResource(atom_basis_list.template getCastedLeader().mw_mem_handle_); - } - -private: - /// multi walker shared memory buffer - struct SoaAtomicBSetMultiWalkerMem : public Resource - { - SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {} - - SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {} - - std::unique_ptr makeClone() const override - { - return std::make_unique(*this); - } - - OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm] - OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl] - OffloadArray3D ylm_v; // [Nelec][PBC][NYlm] - OffloadArray3D rnl_v; // [Nelec][PBC][NRnl] - OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image - OffloadArray2D r; // [Nelec][PBC] ion->elec distance for each image - OffloadVector correctphase; // [Nelec] overall phase - }; - - /// multi walker resource handle - ResourceHandle mw_mem_handle_; - ///size of the basis set - int BasisSetSize; - ///Number of Cell images for the evaluation of the orbital with PBC. If No PBC, should be 0; - TinyVector PBCImages; - ///Coordinates of SuperTwist - TinyVector SuperTwist; - ///maximum radius of this center - RealType Rmax; - ///spherical harmonics - SH Ylm; - ///radial orbitals - ROT MultiRnl; - ///container for the quantum-numbers - std::vector RnlID; - ///temporary storage - VectorSoaContainer tempS; - ///Phase Factor array of images - std::shared_ptr periodic_image_phase_factors_ptr_; - ///Displacements of images - std::shared_ptr periodic_image_displacements_ptr_; - ///reference to the phase factor array of images - OffloadVector& periodic_image_phase_factors_; - ///reference to the displacements of images - OffloadArray2D& periodic_image_displacements_; - /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) \f$ */ - const std::shared_ptr NL_ptr_; - ///index of the corresponding real Spherical Harmonic with quantum numbers \f$ (l,m) \f$ - const std::shared_ptr LM_ptr_; - /// reference to NL_ptr_ - OffloadIntVector& NL; - /// reference to LM_ptr_ - OffloadIntVector& LM; - // timers - NewTimer& ylm_timer_; - NewTimer& rnl_timer_; - NewTimer& pbc_timer_; - NewTimer& nelec_pbc_timer_; - NewTimer& phase_timer_; - NewTimer& psi_timer_; - - template - friend class AOBasisBuilder; - template - friend class RadialOrbitalSetBuilder; -}; +using SoaAtomicBasisSet = SoaAtomicBasisSetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h new file mode 100644 index 0000000000..c95b539210 --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h @@ -0,0 +1,1149 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SoaAtomicBasisSetT.h + */ +#ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H +#define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H + +#include "CPU/math.hpp" +#include "OptimizableObject.h" +#include "ResourceCollection.h" + +namespace qmcplusplus +{ +template +struct CorrectPhaseFunctor +{ + const TinyVector& superTwist; + + template + T operator()(PosType Tv) const + { + return 1.0; + } +}; + +template +struct CorrectPhaseFunctor> +{ + const TinyVector& superTwist; + + template + std::complex operator()(PosType Tv) const + { + T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] + superTwist[2] * Tv[2]; + T s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + return {c, s}; + }; +}; + +/* A basis set for a center type + * + * @tparam ROT : radial function type, e.g.,NGFunctor + * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion + * + * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$ + */ +template +struct SoaAtomicBasisSetT +{ + using RadialOrbital_t = ROT; + using RealType = typename ROT::RealType; + using GridType = typename ROT::GridType; + using ValueType = ORBT; + using OffloadArray4D = Array>; + using OffloadArray3D = Array>; + using OffloadArray2D = Array>; + using OffloadVector = Vector>; + using OffloadIntVector = Vector>; + + ///the constructor + explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) + : Ylm(lmax, addsignforM), + periodic_image_phase_factors_ptr_(std::make_shared()), + periodic_image_displacements_ptr_(std::make_shared()), + periodic_image_phase_factors_(*periodic_image_phase_factors_ptr_), + periodic_image_displacements_(*periodic_image_displacements_ptr_), + NL_ptr_(std::make_shared()), + LM_ptr_(std::make_shared()), + NL(*NL_ptr_), + LM(*LM_ptr_), + ylm_timer_(createGlobalTimer("SoaAtomicBasisSet::Ylm", timer_level_fine)), + rnl_timer_(createGlobalTimer("SoaAtomicBasisSet::Rnl", timer_level_fine)), + pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::pbc_images", timer_level_fine)), + nelec_pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::nelec_pbc_images", timer_level_fine)), + phase_timer_(createGlobalTimer("SoaAtomicBasisSet::phase", timer_level_fine)), + psi_timer_(createGlobalTimer("SoaAtomicBasisSet::psi", timer_level_fine)) + {} + + void checkInVariables(opt_variables_type& active) + { + // for(size_t nl=0; nlcheckInVariables(active); + } + + void checkOutVariables(const opt_variables_type& active) + { + // for(size_t nl=0; nlcheckOutVariables(active); + } + + void resetParameters(const opt_variables_type& active) + { + // for(size_t nl=0; nlresetParameters(active); + } + + /** return the number of basis functions + */ + inline int getBasisSetSize() const + { + //=NL.size(); + return BasisSetSize; + } + + /** Set the number of periodic image for the evaluation of the orbitals and the phase factor. + * In the case of Non-PBC, PBCImages=(1,1,1), SuperTwist(0,0,0) and the PhaseFactor=1. + */ + void setPBCParams(const TinyVector& pbc_images, + const TinyVector supertwist, + const OffloadVector& PeriodicImagePhaseFactors, + const OffloadArray2D& PeriodicImageDisplacements) + { + PBCImages = pbc_images; + periodic_image_phase_factors_ = PeriodicImagePhaseFactors; + periodic_image_displacements_ = PeriodicImageDisplacements; + SuperTwist = supertwist; + + periodic_image_phase_factors_.updateTo(); + periodic_image_displacements_.updateTo(); + } + + /** implement a BasisSetBase virtual function + * + * Set Rmax and BasisSetSize + * @todo Should be able to overwrite Rmax to be much smaller than the maximum grid + */ + inline void finalize() + { + BasisSetSize = LM.size(); + NL.updateTo(); + LM.updateTo(); + tempS.resize(std::max(Ylm.size(), RnlID.size())); + } + + /** Set Rmax */ + template + inline void setRmax(RealType rmax) + { + Rmax = (rmax > 0) ? rmax : MultiRnl.rmax(); + } + + /// set the current offset + inline void setCenter(int c, int offset) {} + + /// Sets a boolean vector for S-type orbitals. Used for cusp correction. + void queryOrbitalsForSType(std::vector& s_orbitals) const + { + for (int i = 0; i < BasisSetSize; i++) + { + s_orbitals[i] = (RnlID[NL[i]][1] == 0); + } + } + + /** evaluate VGL + */ + template + inline void evaluateVGL(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGL& vgl, PosType Tv) + { + int TransX, TransY, TransZ; + + PosType dr_new; + T r_new; + // T psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new; + +#if not defined(QMC_COMPLEX) + const ValueType correctphase = 1; +#else + + RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2]; + RealType s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + const ValueType correctphase(c, s); +#endif + + constexpr T cone(1); + constexpr T ctwo(2); + + + //one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + //V,Gx,Gy,Gz,L + auto* restrict psi = vgl.data(0) + offset; + const T* restrict ylm_v = Ylm[0]; //value + auto* restrict dpsi_x = vgl.data(1) + offset; + const T* restrict ylm_x = Ylm[1]; //gradX + auto* restrict dpsi_y = vgl.data(2) + offset; + const T* restrict ylm_y = Ylm[2]; //gradY + auto* restrict dpsi_z = vgl.data(3) + offset; + const T* restrict ylm_z = Ylm[3]; //gradZ + auto* restrict d2psi = vgl.data(4) + offset; + const T* restrict ylm_l = Ylm[4]; //lap + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + d2psi[ib] = 0; + } + //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + + iter++; + if (r_new >= Rmax) + continue; + + //SIGN Change!! + const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGL(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const T rinv = cone / r_new; + + ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. + const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const T drnloverr = rinv * dphi[nl]; + const T ang = ylm_v[lm]; + const T gr_x = drnloverr * x; + const T gr_y = drnloverr * y; + const T gr_z = drnloverr * z; + const T ang_x = ylm_x[lm]; + const T ang_y = ylm_y[lm]; + const T ang_z = ylm_z[lm]; + const T vr = phi[nl]; + + psi[ib] += ang * vr * Phase; + dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase; + dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase; + dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase; + d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) + + vr * ylm_l[lm]) * + Phase; + } + } + } + } + } + + template + inline void evaluateVGH(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGH& vgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + T r_new; + + constexpr T cone(1); + + //one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + + //V,Gx,Gy,Gz,L + auto* restrict psi = vgh.data(0) + offset; + const T* restrict ylm_v = Ylm[0]; //value + auto* restrict dpsi_x = vgh.data(1) + offset; + const T* restrict ylm_x = Ylm[1]; //gradX + auto* restrict dpsi_y = vgh.data(2) + offset; + const T* restrict ylm_y = Ylm[2]; //gradY + auto* restrict dpsi_z = vgh.data(3) + offset; + const T* restrict ylm_z = Ylm[3]; //gradZ + + auto* restrict dhpsi_xx = vgh.data(4) + offset; + const T* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vgh.data(5) + offset; + const T* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vgh.data(6) + offset; + const T* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vgh.data(7) + offset; + const T* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vgh.data(8) + offset; + const T* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vgh.data(9) + offset; + const T* restrict ylm_zz = Ylm[9]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + psi[ib] = 0; + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + // d2psi[ib] = 0; + } + + for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + //const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + //SIGN Change!! + const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi); + + const T rinv = cone / r_new; + + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const T drnloverr = rinv * dphi[nl]; + const T ang = ylm_v[lm]; + const T gr_x = drnloverr * x; + const T gr_y = drnloverr * y; + const T gr_z = drnloverr * z; + + //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r}) + // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in + // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp. + const T gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr); + const T gr_xx = x * x * gr2_tmp + drnloverr; + const T gr_xy = x * y * gr2_tmp; + const T gr_xz = x * z * gr2_tmp; + const T gr_yy = y * y * gr2_tmp + drnloverr; + const T gr_yz = y * z * gr2_tmp; + const T gr_zz = z * z * gr2_tmp + drnloverr; + + const T ang_x = ylm_x[lm]; + const T ang_y = ylm_y[lm]; + const T ang_z = ylm_z[lm]; + const T ang_xx = ylm_xx[lm]; + const T ang_xy = ylm_xy[lm]; + const T ang_xz = ylm_xz[lm]; + const T ang_yy = ylm_yy[lm]; + const T ang_yz = ylm_yz[lm]; + const T ang_zz = ylm_zz[lm]; + + const T vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y + // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y) + dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + } + } + } + } + } + + template + inline void evaluateVGHGH(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGHGH& vghgh) + { + int TransX, TransY, TransZ; + + PosType dr_new; + T r_new; + + constexpr T cone(1); + + //one can assert the alignment + RealType* restrict phi = tempS.data(0); + RealType* restrict dphi = tempS.data(1); + RealType* restrict d2phi = tempS.data(2); + RealType* restrict d3phi = tempS.data(3); + + //V,Gx,Gy,Gz,L + auto* restrict psi = vghgh.data(0) + offset; + const T* restrict ylm_v = Ylm[0]; //value + auto* restrict dpsi_x = vghgh.data(1) + offset; + const T* restrict ylm_x = Ylm[1]; //gradX + auto* restrict dpsi_y = vghgh.data(2) + offset; + const T* restrict ylm_y = Ylm[2]; //gradY + auto* restrict dpsi_z = vghgh.data(3) + offset; + const T* restrict ylm_z = Ylm[3]; //gradZ + + auto* restrict dhpsi_xx = vghgh.data(4) + offset; + const T* restrict ylm_xx = Ylm[4]; + auto* restrict dhpsi_xy = vghgh.data(5) + offset; + const T* restrict ylm_xy = Ylm[5]; + auto* restrict dhpsi_xz = vghgh.data(6) + offset; + const T* restrict ylm_xz = Ylm[6]; + auto* restrict dhpsi_yy = vghgh.data(7) + offset; + const T* restrict ylm_yy = Ylm[7]; + auto* restrict dhpsi_yz = vghgh.data(8) + offset; + const T* restrict ylm_yz = Ylm[8]; + auto* restrict dhpsi_zz = vghgh.data(9) + offset; + const T* restrict ylm_zz = Ylm[9]; + + auto* restrict dghpsi_xxx = vghgh.data(10) + offset; + const T* restrict ylm_xxx = Ylm[10]; + auto* restrict dghpsi_xxy = vghgh.data(11) + offset; + const T* restrict ylm_xxy = Ylm[11]; + auto* restrict dghpsi_xxz = vghgh.data(12) + offset; + const T* restrict ylm_xxz = Ylm[12]; + auto* restrict dghpsi_xyy = vghgh.data(13) + offset; + const T* restrict ylm_xyy = Ylm[13]; + auto* restrict dghpsi_xyz = vghgh.data(14) + offset; + const T* restrict ylm_xyz = Ylm[14]; + auto* restrict dghpsi_xzz = vghgh.data(15) + offset; + const T* restrict ylm_xzz = Ylm[15]; + auto* restrict dghpsi_yyy = vghgh.data(16) + offset; + const T* restrict ylm_yyy = Ylm[16]; + auto* restrict dghpsi_yyz = vghgh.data(17) + offset; + const T* restrict ylm_yyz = Ylm[17]; + auto* restrict dghpsi_yzz = vghgh.data(18) + offset; + const T* restrict ylm_yzz = Ylm[18]; + auto* restrict dghpsi_zzz = vghgh.data(19) + offset; + const T* restrict ylm_zzz = Ylm[19]; + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + psi[ib] = 0; + + dpsi_x[ib] = 0; + dpsi_y[ib] = 0; + dpsi_z[ib] = 0; + + dhpsi_xx[ib] = 0; + dhpsi_xy[ib] = 0; + dhpsi_xz[ib] = 0; + dhpsi_yy[ib] = 0; + dhpsi_yz[ib] = 0; + dhpsi_zz[ib] = 0; + + dghpsi_xxx[ib] = 0; + dghpsi_xxy[ib] = 0; + dghpsi_xxz[ib] = 0; + dghpsi_xyy[ib] = 0; + dghpsi_xyz[ib] = 0; + dghpsi_xzz[ib] = 0; + dghpsi_yyy[ib] = 0; + dghpsi_yyz[ib] = 0; + dghpsi_yzz[ib] = 0; + dghpsi_zzz[ib] = 0; + } + + for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0); + dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1); + dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2); + r_new = std::sqrt(dot(dr_new, dr_new)); + + //const size_t ib_max=NL.size(); + if (r_new >= Rmax) + continue; + + //SIGN Change!! + const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2]; + Ylm.evaluateVGHGH(x, y, z); + + MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi); + + const T rinv = cone / r_new; + const T xu = x * rinv, yu = y * rinv, zu = z * rinv; + for (size_t ib = 0; ib < BasisSetSize; ++ib) + { + const int nl(NL[ib]); + const int lm(LM[ib]); + const T drnloverr = rinv * dphi[nl]; + const T ang = ylm_v[lm]; + const T gr_x = drnloverr * x; + const T gr_y = drnloverr * y; + const T gr_z = drnloverr * z; + + //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is + // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r}) + // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in + // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp. This is p(r) in the notes. + const T gr2_tmp = rinv * (d2phi[nl] - drnloverr); + + const T gr_xx = x * xu * gr2_tmp + drnloverr; + const T gr_xy = x * yu * gr2_tmp; + const T gr_xz = x * zu * gr2_tmp; + const T gr_yy = y * yu * gr2_tmp + drnloverr; + const T gr_yz = y * zu * gr2_tmp; + const T gr_zz = z * zu * gr2_tmp + drnloverr; + + //This is q(r) in the notes. + const T gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp; + + const T gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu); + const T gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu; + const T gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu; + const T gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu; + const T gr_xyz = xu * yu * zu * gr3_tmp; + const T gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu; + const T gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu); + const T gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu; + const T gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu; + const T gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu); + + + //Angular derivatives up to third + const T ang_x = ylm_x[lm]; + const T ang_y = ylm_y[lm]; + const T ang_z = ylm_z[lm]; + + const T ang_xx = ylm_xx[lm]; + const T ang_xy = ylm_xy[lm]; + const T ang_xz = ylm_xz[lm]; + const T ang_yy = ylm_yy[lm]; + const T ang_yz = ylm_yz[lm]; + const T ang_zz = ylm_zz[lm]; + + const T ang_xxx = ylm_xxx[lm]; + const T ang_xxy = ylm_xxy[lm]; + const T ang_xxz = ylm_xxz[lm]; + const T ang_xyy = ylm_xyy[lm]; + const T ang_xyz = ylm_xyz[lm]; + const T ang_xzz = ylm_xzz[lm]; + const T ang_yyy = ylm_yyy[lm]; + const T ang_yyz = ylm_yyz[lm]; + const T ang_yzz = ylm_yzz[lm]; + const T ang_zzz = ylm_zzz[lm]; + + const T vr = phi[nl]; + + psi[ib] += ang * vr; + dpsi_x[ib] += ang * gr_x + vr * ang_x; + dpsi_y[ib] += ang * gr_y + vr * ang_y; + dpsi_z[ib] += ang * gr_z + vr * ang_z; + + + // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y + // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y) + dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x; + dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x; + dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x; + dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y; + dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y; + dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z; + + dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx; + dghpsi_xxy[ib] += + gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x; + dghpsi_xxz[ib] += + gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x; + dghpsi_xyy[ib] += + gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y; + dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x + + ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y; + dghpsi_xzz[ib] += + gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z; + dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy; + dghpsi_yyz[ib] += + gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y; + dghpsi_yzz[ib] += + gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z; + dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz; + } + } + } + } + } + + /** evaluate V + */ + template + inline void evaluateV(const LAT& lattice, const T r, const PosType& dr, VT* restrict psi, PosType Tv) + { + int TransX, TransY, TransZ; + + PosType dr_new; + T r_new; + +#if not defined(QMC_COMPLEX) + const ValueType correctphase = 1.0; +#else + + RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2]; + RealType s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + const ValueType correctphase(c, s); + +#endif + + RealType* restrict ylm_v = tempS.data(0); + RealType* restrict phi_r = tempS.data(1); + + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] = 0; + //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax) + int iter = -1; + for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2); + for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2); + for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z + { + //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc... + TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2); + + dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0)); + dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1)); + dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2)); + + r_new = std::sqrt(dot(dr_new, dr_new)); + iter++; + if (r_new >= Rmax) + continue; + + Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v); + MultiRnl.evaluate(r_new, phi_r); + ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. + const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase; + for (size_t ib = 0; ib < BasisSetSize; ++ib) + psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase; + } + } + } + } + + /** + * @brief evaluate VGL for multiple electrons + * + * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]]. + * These elements are assumed to be zero when passed to this function. + * This function only uses only one center (center_idx) from displ_list + * + * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers] + * @param [in] lattice crystal lattice + * @param [in,out] psi_vgl wavefunction vgl for all electrons [5, nElec, nBasTot] + * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened) + * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened) + * @param [in] nElec number of electrons + * @param [in] nBasTot total number of basis functions represented in psi_vgl + * @param [in] center_idx current center index (for indexing into displ_list) + * @param [in] BasisOffset index of first basis function of this center (for indexing into psi_vgl) + * @param [in] NumCenters total number of centers in system (for indexing into displ_list) + * + */ + + template + inline void mw_evaluateVGL(const RefVectorWithLeader& atom_bs_list, + const LAT& lattice, + Array>& psi_vgl, + const Vector>& displ_list, + const Vector>& Tv_list, + const size_t nElec, + const size_t nBasTot, + const size_t center_idx, + const size_t BasisOffset, + const size_t NumCenters) + { + assert(this == &atom_bs_list.getLeader()); + auto& atom_bs_leader = atom_bs_list.template getCastedLeader>(); + + int Nx = PBCImages[0] + 1; + int Ny = PBCImages[1] + 1; + int Nz = PBCImages[2] + 1; + const int Nxyz = Nx * Ny * Nz; + + assert(psi_vgl.size(0) == 5); + assert(psi_vgl.size(1) == nElec); + assert(psi_vgl.size(2) == nBasTot); + + + auto& ylm_vgl = atom_bs_leader.mw_mem_handle_.getResource().ylm_vgl; + auto& rnl_vgl = atom_bs_leader.mw_mem_handle_.getResource().rnl_vgl; + auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr; + auto& r = atom_bs_leader.mw_mem_handle_.getResource().r; + + size_t nRnl = RnlID.size(); + size_t nYlm = Ylm.size(); + + ylm_vgl.resize(5, nElec, Nxyz, nYlm); + rnl_vgl.resize(3, nElec, Nxyz, nRnl); + dr.resize(nElec, Nxyz, 3); + r.resize(nElec, Nxyz); + + + // TODO: move these outside? + auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase; + correctphase.resize(nElec); + + auto* dr_ptr = dr.data(); + auto* r_ptr = r.data(); + + auto* correctphase_ptr = correctphase.data(); + + auto* Tv_list_ptr = Tv_list.data(); + auto* displ_list_ptr = displ_list.data(); + + constexpr RealType cone(1); + constexpr RealType ctwo(2); + + //V,Gx,Gy,Gz,L + auto* restrict psi_ptr = psi_vgl.data_at(0, 0, 0); + auto* restrict dpsi_x_ptr = psi_vgl.data_at(1, 0, 0); + auto* restrict dpsi_y_ptr = psi_vgl.data_at(2, 0, 0); + auto* restrict dpsi_z_ptr = psi_vgl.data_at(3, 0, 0); + auto* restrict d2psi_ptr = psi_vgl.data_at(4, 0, 0); + + { + ScopedTimer local_timer(phase_timer_); +#if not defined(QMC_COMPLEX) + + PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + correctphase_ptr[i_e] = 1.0; + +#else + auto* SuperTwist_ptr = SuperTwist.data(); + + PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ + Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + { + //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1); + RealType phasearg = 0; + for (size_t i_dim = 0; i_dim < 3; i_dim++) + phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)]; + RealType s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + correctphase_ptr[i_e] = ValueType(c, s); + } +#endif + } + + { + ScopedTimer local_timer(nelec_pbc_timer_); + auto* periodic_image_displacements_ptr = periodic_image_displacements_.data(); + PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ + map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ + map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) + { + RealType tmp_r2 = 0.0; + for (size_t i_dim = 0; i_dim < 3; i_dim++) + { + dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] + + periodic_image_displacements_ptr[i_dim + 3 * i_xyz]); + tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)]; + } + r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2); + //printf("particle %lu image %d, %lf, %lf\n", i_e, i_xyz, tmp_r2, dr_ptr[3 * (i_xyz + Nxyz * i_e)]); + } + } + + { + ScopedTimer local(rnl_timer_); + MultiRnl.batched_evaluateVGL(r, rnl_vgl, Rmax); + } + + { + ScopedTimer local(ylm_timer_); + Ylm.batched_evaluateVGL(dr, ylm_vgl); + } + + { + ScopedTimer local_timer(psi_timer_); + auto* phase_fac_ptr = periodic_image_phase_factors_.data(); + auto* LM_ptr = LM.data(); + auto* NL_ptr = NL.data(); + const int bset_size = BasisSetSize; + + RealType* restrict phi_ptr = rnl_vgl.data_at(0, 0, 0, 0); + RealType* restrict dphi_ptr = rnl_vgl.data_at(1, 0, 0, 0); + RealType* restrict d2phi_ptr = rnl_vgl.data_at(2, 0, 0, 0); + + + const RealType* restrict ylm_v_ptr = ylm_vgl.data_at(0, 0, 0, 0); //value + const RealType* restrict ylm_x_ptr = ylm_vgl.data_at(1, 0, 0, 0); //gradX + const RealType* restrict ylm_y_ptr = ylm_vgl.data_at(2, 0, 0, 0); //gradY + const RealType* restrict ylm_z_ptr = ylm_vgl.data_at(3, 0, 0, 0); //gradZ + const RealType* restrict ylm_l_ptr = ylm_vgl.data_at(4, 0, 0, 0); //lap + PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ + map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ + map(to:ylm_v_ptr[:nYlm*nElec*Nxyz], ylm_x_ptr[:nYlm*nElec*Nxyz], ylm_y_ptr[:nYlm*nElec*Nxyz], ylm_z_ptr[:nYlm*nElec*Nxyz], ylm_l_ptr[:nYlm*nElec*Nxyz], \ + phi_ptr[:nRnl*nElec*Nxyz], dphi_ptr[:nRnl*nElec*Nxyz], d2phi_ptr[:nRnl*nElec*Nxyz], \ + psi_ptr[:nBasTot*nElec], dpsi_x_ptr[:nBasTot*nElec], dpsi_y_ptr[:nBasTot*nElec], dpsi_z_ptr[:nBasTot*nElec], d2psi_ptr[:nBasTot*nElec], \ + correctphase_ptr[:nElec], r_ptr[:nElec*Nxyz], dr_ptr[:3*nElec*Nxyz]) ") + for (int i_e = 0; i_e < nElec; i_e++) + for (int ib = 0; ib < bset_size; ++ib) + { + const int nl(NL_ptr[ib]); + const int lm(LM_ptr[ib]); + VT psi = 0; + VT dpsi_x = 0; + VT dpsi_y = 0; + VT dpsi_z = 0; + VT d2psi = 0; + + for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) + { + const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e]; + const RealType rinv = cone / r_ptr[i_xyz + Nxyz * i_e]; + const RealType x = dr_ptr[0 + 3 * (i_xyz + Nxyz * i_e)]; + const RealType y = dr_ptr[1 + 3 * (i_xyz + Nxyz * i_e)]; + const RealType z = dr_ptr[2 + 3 * (i_xyz + Nxyz * i_e)]; + const RealType drnloverr = rinv * dphi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]; + const RealType ang = ylm_v_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; + const RealType gr_x = drnloverr * x; + const RealType gr_y = drnloverr * y; + const RealType gr_z = drnloverr * z; + const RealType ang_x = ylm_x_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; + const RealType ang_y = ylm_y_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; + const RealType ang_z = ylm_z_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]; + const RealType vr = phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]; + + psi += ang * vr * Phase; + dpsi_x += (ang * gr_x + vr * ang_x) * Phase; + dpsi_y += (ang * gr_y + vr * ang_y) * Phase; + dpsi_z += (ang * gr_z + vr * ang_z) * Phase; + d2psi += (ang * (ctwo * drnloverr + d2phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]) + + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) + + vr * ylm_l_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]) * + Phase; + } + + psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi; + dpsi_x_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_x; + dpsi_y_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_y; + dpsi_z_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_z; + d2psi_ptr[BasisOffset + ib + i_e * nBasTot] = d2psi; + } + } + } + + /** + * @brief evaluate for multiple electrons + * + * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]]. + * These elements are assumed to be zero when passed to this function. + * This function only uses only one center (center_idx) from displ_list + * + * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers] + * @param [in] lattice crystal lattice + * @param [in,out] psi wavefunction values for all electrons [nElec, nBasTot] + * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened) + * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened) + * @param [in] nElec number of electrons + * @param [in] nBasTot total number of basis functions represented in psi + * @param [in] center_idx current center index (for indexing into displ_list) + * @param [in] BasisOffset index of first basis function of this center (for indexing into psi) + * @param [in] NumCenters total number of centers in system (for indexing into displ_list) + * + */ + template + inline void mw_evaluateV(const RefVectorWithLeader& atom_bs_list, + const LAT& lattice, + Array>& psi, + const Vector>& displ_list, + const Vector>& Tv_list, + const size_t nElec, + const size_t nBasTot, + const size_t center_idx, + const size_t BasisOffset, + const size_t NumCenters) + { + assert(this == &atom_bs_list.getLeader()); + auto& atom_bs_leader = atom_bs_list.template getCastedLeader>(); + //TODO: use QMCTraits::DIM instead of 3? + // DIM==3 is baked into so many parts here that it's probably not worth it for now + const int Nx = PBCImages[0] + 1; + const int Ny = PBCImages[1] + 1; + const int Nz = PBCImages[2] + 1; + const int Nxyz = Nx * Ny * Nz; + assert(psi.size(0) == nElec); + assert(psi.size(1) == nBasTot); + + + auto& ylm_v = atom_bs_leader.mw_mem_handle_.getResource().ylm_v; + auto& rnl_v = atom_bs_leader.mw_mem_handle_.getResource().rnl_v; + auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr; + auto& r = atom_bs_leader.mw_mem_handle_.getResource().r; + + const size_t nRnl = RnlID.size(); + const size_t nYlm = Ylm.size(); + + ylm_v.resize(nElec, Nxyz, nYlm); + rnl_v.resize(nElec, Nxyz, nRnl); + dr.resize(nElec, Nxyz, 3); + r.resize(nElec, Nxyz); + + // TODO: move these outside? + auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase; + correctphase.resize(nElec); + + auto* dr_ptr = dr.data(); + auto* r_ptr = r.data(); + + auto* correctphase_ptr = correctphase.data(); + + auto* Tv_list_ptr = Tv_list.data(); + auto* displ_list_ptr = displ_list.data(); + + // need to map Tensor vals to device + auto* latR_ptr = lattice.R.data(); + + + { + ScopedTimer local_timer(phase_timer_); +#if not defined(QMC_COMPLEX) + + PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + correctphase_ptr[i_e] = 1.0; + +#else + auto* SuperTwist_ptr = SuperTwist.data(); + + PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ + Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + { + //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1); + RealType phasearg = 0; + for (size_t i_dim = 0; i_dim < 3; i_dim++) + phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)]; + RealType s, c; + qmcplusplus::sincos(-phasearg, &s, &c); + correctphase_ptr[i_e] = ValueType(c, s); + } +#endif + } + + { + ScopedTimer local_timer(nelec_pbc_timer_); + auto* periodic_image_displacements_ptr = periodic_image_displacements_.data(); + PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ + map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ + map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ") + for (size_t i_e = 0; i_e < nElec; i_e++) + for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) + { + RealType tmp_r2 = 0.0; + for (size_t i_dim = 0; i_dim < 3; i_dim++) + { + dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] + + periodic_image_displacements_ptr[i_dim + 3 * i_xyz]); + tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)]; + } + r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2); + } + } + + + { + ScopedTimer local(rnl_timer_); + MultiRnl.batched_evaluate(r, rnl_v, Rmax); + } + + { + ScopedTimer local(ylm_timer_); + Ylm.batched_evaluateV(dr, ylm_v); + } + + { + ScopedTimer local_timer(psi_timer_); + ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table. + auto* phase_fac_ptr = periodic_image_phase_factors_.data(); + auto* LM_ptr = LM.data(); + auto* NL_ptr = NL.data(); + auto* psi_ptr = psi.data(); + const int bset_size = BasisSetSize; + + auto* ylm_ptr = ylm_v.data(); + auto* rnl_ptr = rnl_v.data(); + PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \ + map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ + map(to:ylm_ptr[:nYlm*nElec*Nxyz], rnl_ptr[:nRnl*nElec*Nxyz], psi_ptr[:nBasTot*nElec], correctphase_ptr[:nElec])") + for (int i_e = 0; i_e < nElec; i_e++) + for (int ib = 0; ib < bset_size; ++ib) + { + VT psi = 0; + for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++) + { + const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e]; + psi += ylm_ptr[(i_xyz + Nxyz * i_e) * nYlm + LM_ptr[ib]] * + rnl_ptr[(i_xyz + Nxyz * i_e) * nRnl + NL_ptr[ib]] * Phase; + } + psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi; + } + } + } + + void createResource(ResourceCollection& collection) const + { + collection.addResource(std::make_unique()); + } + + void acquireResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + atom_basis_list.template getCastedLeader().mw_mem_handle_ = + collection.lendResource(); + } + + void releaseResource(ResourceCollection& collection, + const RefVectorWithLeader& atom_basis_list) const + { + assert(this == &atom_basis_list.getLeader()); + collection.takebackResource(atom_basis_list.template getCastedLeader().mw_mem_handle_); + } + +private: + /// multi walker shared memory buffer + struct SoaAtomicBSetMultiWalkerMem : public Resource + { + SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {} + + SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {} + + std::unique_ptr makeClone() const override + { + return std::make_unique(*this); + } + + OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm] + OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl] + OffloadArray3D ylm_v; // [Nelec][PBC][NYlm] + OffloadArray3D rnl_v; // [Nelec][PBC][NRnl] + OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image + OffloadArray2D r; // [Nelec][PBC] ion->elec distance for each image + OffloadVector correctphase; // [Nelec] overall phase + }; + + /// multi walker resource handle + ResourceHandle mw_mem_handle_; + ///size of the basis set + int BasisSetSize; + ///Number of Cell images for the evaluation of the orbital with PBC. If No PBC, should be 0; + TinyVector PBCImages; + ///Coordinates of SuperTwist + TinyVector SuperTwist; + ///maximum radius of this center + RealType Rmax; + ///spherical harmonics + SH Ylm; + ///radial orbitals + ROT MultiRnl; + ///container for the quantum-numbers + std::vector RnlID; + ///temporary storage + VectorSoaContainer tempS; + ///Phase Factor array of images + std::shared_ptr periodic_image_phase_factors_ptr_; + ///Displacements of images + std::shared_ptr periodic_image_displacements_ptr_; + ///reference to the phase factor array of images + OffloadVector& periodic_image_phase_factors_; + ///reference to the displacements of images + OffloadArray2D& periodic_image_displacements_; + /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) \f$ */ + const std::shared_ptr NL_ptr_; + ///index of the corresponding real Spherical Harmonic with quantum numbers \f$ (l,m) \f$ + const std::shared_ptr LM_ptr_; + /// reference to NL_ptr_ + OffloadIntVector& NL; + /// reference to LM_ptr_ + OffloadIntVector& LM; + // timers + NewTimer& ylm_timer_; + NewTimer& rnl_timer_; + NewTimer& pbc_timer_; + NewTimer& nelec_pbc_timer_; + NewTimer& phase_timer_; + NewTimer& psi_timer_; + + template + friend class AOBasisBuilderT; + template + friend class RadialOrbitalSetBuilder; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp similarity index 71% rename from src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp rename to src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp index 0b41857fb9..c377efb09f 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp @@ -9,30 +9,36 @@ // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. ////////////////////////////////////////////////////////////////////////////////////// - -/** @file SoaCuspCorrection.cpp +/** @file SoaCuspCorrectionT.cpp */ -#include "SoaCuspCorrection.h" +#include "SoaCuspCorrectionT.h" + #include "SoaCuspCorrectionBasisSet.h" +#include "Particle/DistanceTableT.h" namespace qmcplusplus { -SoaCuspCorrection::SoaCuspCorrection(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions)) +template +SoaCuspCorrectionT::SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els) + : myTableIndex(els.addTable(ions)) { NumCenters = ions.getTotalNum(); NumTargets = els.getTotalNum(); LOBasisSet.resize(NumCenters); } -SoaCuspCorrection::SoaCuspCorrection(const SoaCuspCorrection& a) = default; +template +SoaCuspCorrectionT::SoaCuspCorrectionT(const SoaCuspCorrectionT& a) = default; -void SoaCuspCorrection::setOrbitalSetSize(int norbs) +template +void SoaCuspCorrectionT::setOrbitalSetSize(int norbs) { MaxOrbSize = norbs; myVGL.resize(5, MaxOrbSize); } -inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl) +template +inline void SoaCuspCorrectionT::evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl) { assert(MaxOrbSize >= vgl.size()); myVGL = 0.0; @@ -66,11 +72,12 @@ inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVec } } -void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi) +template +void SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { assert(MaxOrbSize >= psi.size()); myVGL = 0.0; @@ -97,12 +104,13 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, } } -void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, - int iat, - int idx, - ValueMatrix& psi, - GradMatrix& dpsi, - ValueMatrix& d2psi) +template +void SoaCuspCorrectionT::evaluate_vgl(const ParticleSetT& P, + int iat, + int idx, + ValueMatrix& psi, + GradMatrix& dpsi, + ValueMatrix& d2psi) { assert(MaxOrbSize >= psi.cols()); myVGL = 0.0; @@ -129,32 +137,37 @@ void SoaCuspCorrection::evaluate_vgl(const ParticleSet& P, } } -void SoaCuspCorrection::evaluateV(const ParticleSet& P, int iat, ValueVector& psi) +template +void SoaCuspCorrectionT::evaluateV(const ParticleSetT& P, int iat, ValueVector& psi) { assert(MaxOrbSize >= psi.size()); - ValueType* tmp_vals = myVGL[0]; + T* tmp_vals = myVGL[0]; std::fill_n(tmp_vals, myVGL.size(), 0.0); const auto& d_table = P.getDistTableAB(myTableIndex); const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat); - //THIS IS SERIAL, only way to avoid this is to use myVGL + // THIS IS SERIAL, only way to avoid this is to use myVGL for (int c = 0; c < NumCenters; c++) if (LOBasisSet[c]) LOBasisSet[c]->evaluate(dist[c], tmp_vals); - { //collect + { // collect const auto v_in = myVGL[0]; for (size_t i = 0; i < psi.size(); ++i) psi[i] += v_in[i]; } } -void SoaCuspCorrection::add(int icenter, std::unique_ptr aos) +template +void SoaCuspCorrectionT::add(int icenter, std::unique_ptr aos) { assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!"); LOBasisSet[icenter].reset(aos.release()); } +template class SoaCuspCorrectionT; +template class SoaCuspCorrectionT; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h new file mode 100644 index 0000000000..f7ed8c7b2e --- /dev/null +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h @@ -0,0 +1,117 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2021 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +////////////////////////////////////////////////////////////////////////////////////// + +/** @file SoaCuspCorrectionT.h + */ +#ifndef QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H +#define QMCPLUSPLUS_SOA_CUSPCORRECTIONT_H + +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +class CuspCorrectionAtomicBasis; + +/** A localized basis set derived from BasisSetBase + * + * This class performs the evaluation of the basis functions and their + * derivatives for each of the N-particles in a configuration. + * The template parameter COT denotes Centered-Orbital-Type which provides + * a set of localized orbitals associated with a center. + */ +template +class SoaCuspCorrectionT +{ + using RealType = typename SPOSetT::RealType; + using VGLVector = VectorSoaContainer; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradVector = typename SPOSetT::GradVector; + using ValueVector = typename SPOSetT::ValueVector; + using PosType = typename SPOSetT::PosType; + + /// number of centers, e.g., ions + size_t NumCenters; + /// number of quantum particles + size_t NumTargets; + /// number of quantum particles + const int myTableIndex; + /** Maximal number of supported MOs + * this is not the AO basis because cusp correction is applied on the MO + * directly. + */ + int MaxOrbSize = 0; + + /// COMPLEX WON'T WORK + using COT = CuspCorrectionAtomicBasis; + + /** container of the unique pointers to the Atomic Orbitals + * + * size of LOBasisSet = number of centers (atoms) + * should use unique_ptr once COT is fixed for better performance + */ + std::vector> LOBasisSet; + + Matrix myVGL; + +public: + /** constructor + * @param ions ionic system + * @param els electronic system + */ + SoaCuspCorrectionT(ParticleSetT& ions, ParticleSetT& els); + + /** copy constructor */ + SoaCuspCorrectionT(const SoaCuspCorrectionT& a); + + /** set the number of orbitals this cusp correction may serve. call this + * before adding any correction centers. + */ + void setOrbitalSetSize(int norbs); + + /** compute VGL + * @param P quantum particleset + * @param iat active particle + * @param vgl Matrix(5,BasisSetSize) + * @param trialMove if true, use getTempDists()/getTempDispls() + */ + void evaluateVGL(const ParticleSetT& P, int iat, VGLVector& vgl); + + void evaluate_vgl(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi); + + void evaluate_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi); + + /** compute values for the iat-paricle move + * + * Always uses getTempDists() and getTempDispls() + */ + void evaluateV(const ParticleSetT& P, int iat, ValueVector& psi); + + /** add a new set of Centered Atomic Orbitals + * @param icenter the index of the center + * @param aos a set of Centered Atomic Orbitals + */ + void add(int icenter, std::unique_ptr aos); + + void addVGL(const ParticleSetT& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); } + void addV(const ParticleSetT& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); } + void add_vgl(const ParticleSetT& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi) + { + evaluate_vgl(P, iat, idx, vals, dpsi, d2psi); + } + void add_vector_vgl(const ParticleSetT& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi) + { + evaluate_vgl(P, iat, vals, dpsi, d2psi); + } +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp index eb4ab81f3c..e5a8e8af7c 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp @@ -93,7 +93,7 @@ RefVectorWithLeader SoaLocalizedBasisSet::extractOneSpeciesBasis template -SoaLocalizedBasisSet::SoaLocalizedBasisSet(ParticleSet& ions, ParticleSet& els) +SoaLocalizedBasisSet::SoaLocalizedBasisSet(ParticleSetT& ions, ParticleSetT& els) : ions_(ions), myTableIndex(els.addTable(ions, DTModes::NEED_FULL_TABLE_ANYTIME | DTModes::NEED_VP_FULL_TABLE_ON_HOST)), SuperTwist(0.0) @@ -193,7 +193,7 @@ void SoaLocalizedBasisSet::queryOrbitalsForSType(const std::vector -void SoaLocalizedBasisSet::evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) +void SoaLocalizedBasisSet::evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) { const auto& IonID(ions_.GroupID); const auto& coordR = P.activeR(iat); @@ -213,7 +213,7 @@ void SoaLocalizedBasisSet::evaluateVGL(const ParticleSet& P, int iat, template void SoaLocalizedBasisSet::mw_evaluateVGL(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& vgl_v) { @@ -259,7 +259,7 @@ void SoaLocalizedBasisSet::mw_evaluateVGL(const RefVectorWithLeader -void SoaLocalizedBasisSet::evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) +void SoaLocalizedBasisSet::evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) { const auto& IonID(ions_.GroupID); const auto& d_table = P.getDistTableAB(myTableIndex); @@ -272,7 +272,7 @@ void SoaLocalizedBasisSet::evaluateVGH(const ParticleSet& P, int iat, } template -void SoaLocalizedBasisSet::evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) +void SoaLocalizedBasisSet::evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) { // APP_ABORT("SoaLocalizedBasisSet::evaluateVGH() not implemented\n"); @@ -289,7 +289,7 @@ void SoaLocalizedBasisSet::evaluateVGHGH(const ParticleSet& P, int ia template void SoaLocalizedBasisSet::mw_evaluateValueVPs(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& vp_list, OffloadMWVArray& vp_basis_v) { assert(this == &basis_list.getLeader()); @@ -340,7 +340,7 @@ void SoaLocalizedBasisSet::mw_evaluateValueVPs(const RefVectorWithLea // vp_basis_v.updateFrom(); } template -void SoaLocalizedBasisSet::evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) +void SoaLocalizedBasisSet::evaluateV(const ParticleSetT& P, int iat, ORBT* restrict vals) { const auto& IonID(ions_.GroupID); const auto& coordR = P.activeR(iat); @@ -360,7 +360,7 @@ void SoaLocalizedBasisSet::evaluateV(const ParticleSet& P, int iat, O template void SoaLocalizedBasisSet::mw_evaluateValue(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& vals) { @@ -406,9 +406,9 @@ void SoaLocalizedBasisSet::mw_evaluateValue(const RefVectorWithLeader template -void SoaLocalizedBasisSet::evaluateGradSourceV(const ParticleSet& P, +void SoaLocalizedBasisSet::evaluateGradSourceV(const ParticleSetT& P, int iat, - const ParticleSet& ions, + const ParticleSetT& ions, int jion, vgl_type& vgl) { @@ -439,9 +439,9 @@ void SoaLocalizedBasisSet::evaluateGradSourceV(const ParticleSet& P, } template -void SoaLocalizedBasisSet::evaluateGradSourceVGL(const ParticleSet& P, +void SoaLocalizedBasisSet::evaluateGradSourceVGL(const ParticleSetT& P, int iat, - const ParticleSet& ions, + const ParticleSetT& ions, int jion, vghgh_type& vghgh) { diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h index 5d384fc99b..3a3c614e27 100644 --- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h +++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h @@ -44,7 +44,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase using vgl_type = typename BaseType::vgl_type; using vgh_type = typename BaseType::vgh_type; using vghgh_type = typename BaseType::vghgh_type; - using PosType = typename ParticleSet::PosType; + using PosType = typename ParticleSetT::PosType; using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] using OffloadMWVArray = Array>; // [walker, Orbs] @@ -55,7 +55,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase ///number of quantum particles size_t NumTargets; ///ion particle set - const ParticleSet& ions_; + const ParticleSetT& ions_; ///number of quantum particles const int myTableIndex; ///Global Coordinate of Supertwist read from HDF5 @@ -77,7 +77,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param ions ionic system * @param els electronic system */ - SoaLocalizedBasisSet(ParticleSet& ions, ParticleSet& els); + SoaLocalizedBasisSet(ParticleSetT& ions, ParticleSetT& els); /** copy constructor */ SoaLocalizedBasisSet(const SoaLocalizedBasisSet& a); @@ -108,7 +108,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param vgl Matrix(5,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) override; + void evaluateVGL(const ParticleSetT& P, int iat, vgl_type& vgl) override; /** compute V using packed array with all walkers * @param basis_list list of basis sets (one for each walker) @@ -117,7 +117,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param v Array(n_walkers, BasisSetSize) */ void mw_evaluateValue(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVArray& v) override; @@ -127,7 +127,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param v Array(n_walkers, BasisSetSize) */ void mw_evaluateValueVPs(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& vp_list, + const RefVectorWithLeader>& vp_list, OffloadMWVArray& v) override; @@ -138,7 +138,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param vgl Array(n_walkers, 5, BasisSetSize) */ void mw_evaluateVGL(const RefVectorWithLeader>& basis_list, - const RefVectorWithLeader& P_list, + const RefVectorWithLeader>& P_list, int iat, OffloadMWVGLArray& vgl) override; @@ -148,7 +148,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param vgl Matrix(10,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) override; + void evaluateVGH(const ParticleSetT& P, int iat, vgh_type& vgh) override; /** compute VGHGH * @param P quantum particleset @@ -156,7 +156,7 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * @param vghgh Matrix(20,BasisSetSize) * @param trialMove if true, use getTempDists()/getTempDispls() */ - void evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) override; + void evaluateVGHGH(const ParticleSetT& P, int iat, vghgh_type& vghgh) override; /** compute values for the iat-paricle move * @@ -169,13 +169,13 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase * displacement. We need to keep track of Tv because it must be add * as a phase factor, i.e., exp(i*k*Tv). */ - void evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) override; + void evaluateV(const ParticleSetT& P, int iat, ORBT* restrict vals) override; - void evaluateGradSourceV(const ParticleSet& P, int iat, const ParticleSet& ions, int jion, vgl_type& vgl) override; + void evaluateGradSourceV(const ParticleSetT& P, int iat, const ParticleSetT& ions, int jion, vgl_type& vgl) override; - void evaluateGradSourceVGL(const ParticleSet& P, + void evaluateGradSourceVGL(const ParticleSetT& P, int iat, - const ParticleSet& ions, + const ParticleSetT& ions, int jion, vghgh_type& vghgh) override; diff --git a/src/QMCWaveFunctions/OptimizableFunctorBase.h b/src/QMCWaveFunctions/OptimizableFunctorBase.h index 33048b3049..01c470e9a2 100644 --- a/src/QMCWaveFunctions/OptimizableFunctorBase.h +++ b/src/QMCWaveFunctions/OptimizableFunctorBase.h @@ -22,7 +22,7 @@ #include "OptimizableObject.h" #include "OhmmsData/OhmmsElementBase.h" #include "OhmmsPETE/TinyVector.h" -//#include +#include "QMCWaveFunctions/VariableSet.h" #include namespace qmcplusplus @@ -48,8 +48,6 @@ struct OptimizableFunctorBase : public OptimizableObject { ///typedef for real values using real_type = optimize::VariableSet::real_type; - ///typedef for variableset: this is going to be replaced - using opt_variables_type = optimize::VariableSet; ///maximum cutoff real_type cutoff_radius = 0.0; ///set of variables to be optimized diff --git a/src/QMCWaveFunctions/OptimizableObject.h b/src/QMCWaveFunctions/OptimizableObject.h index 17cf4af88e..2033e89fc9 100644 --- a/src/QMCWaveFunctions/OptimizableObject.h +++ b/src/QMCWaveFunctions/OptimizableObject.h @@ -13,103 +13,21 @@ #ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECT_H #define QMCPLUSPLUS_OPTIMIZABLEOBJECT_H -#include "VariableSet.h" -#include "type_traits/template_types.hpp" +#include "Configuration.h" +#include "OptimizableObjectT.h" /**@file OptimizableObject.h *@brief Declaration of OptimizableObject */ namespace qmcplusplus { -using opt_variables_type = optimize::VariableSet; +using opt_variables_type = OptVariablesTypeT; -class OptimizableObject -{ -public: - OptimizableObject(const std::string& name) : name_(name) {} - - const std::string& getName() const { return name_; } - bool isOptimized() const { return is_optimized_; } - -private: - /** Name of the optimizable object - */ - const std::string name_; - /** If true, this object is actively modified during WFOpt - */ - bool is_optimized_ = false; - -public: - /** check in variational parameters to the global list of parameters used by the optimizer. - * @param active a super set of optimizable variables - * - * The existing checkInVariables implementation in WFC/SPO/.. are inclusive and it calls checkInVariables of its members - * class A: public SPOSet {} - * class B: public WFC - * { - * A objA; - * checkInVariables() { objA.checkInVariables(); } - * }; - * - * With OptimizableObject, - * class A: public OptimizableObject {} - * class B: public OptimizableObject - * { - * A objA; - * checkInVariablesExclusive() { // should not call objA.checkInVariablesExclusive() if objA has been extracted; } - * }; - * A vector of OptimizableObject, will be created by calling extractOptimizableObjects(). - * All the checkInVariablesExclusive() will be called through this vector and thus - * checkInVariablesExclusive implementation should only handle non-OptimizableObject members. - */ - virtual void checkInVariablesExclusive(opt_variables_type& active) = 0; - - /** reset the parameters during optimizations. Exclusive, see checkInVariablesExclusive - */ - virtual void resetParametersExclusive(const opt_variables_type& active) = 0; +using OptVariablesType = OptVariablesTypeT; - /** print the state, e.g., optimizables */ - virtual void reportStatus(std::ostream& os) {} - - void setOptimization(bool state) { is_optimized_ = state; } - - /** Write the variational parameters for this object to the VP HDF file - * - * The hout parameter should come from VariableSet::writeToHDF - * - * Objects can use this function to store additional information to the file. - * - * By default the parameters are saved in VariableSet::writeToHDF, and objects - * do not need to implement this function (yet). - * - */ - virtual void writeVariationalParameters(hdf_archive& hout){}; - - /** Read the variational parameters for this object from the VP HDF file - * - * The hin parameter should come from VariableSet::readFromHDF - * - * By default the parameters are read in VariableSet::readFromHDF, and objects - * do not need to implement this function (yet). - */ - virtual void readVariationalParameters(hdf_archive& hin){}; -}; - -class UniqueOptObjRefs : public RefVector -{ -public: - OptimizableObject& operator[](size_t i) const { return RefVector::operator[](i); } +using OptimizableObject = OptimizableObjectT; - void push_back(OptimizableObject& obj) - { - if (obj.getName().empty()) - throw std::logic_error("BUG!! Only named OptimizableObject object can be added to UniqueOptObjRefs!"); - auto result = - std::find_if(begin(), end(), [&](OptimizableObject& element) { return element.getName() == obj.getName(); }); - if (result == end()) - RefVector::push_back(obj); - } -}; +using UniqueOptObjRefs = UniqueOptObjRefsT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h new file mode 100644 index 0000000000..48e827e1ba --- /dev/null +++ b/src/QMCWaveFunctions/OptimizableObjectT.h @@ -0,0 +1,123 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H +#define QMCPLUSPLUS_OPTIMIZABLEOBJECTT_H + +#include "VariableSetT.h" +#include "type_traits/template_types.hpp" + +/**@file OptimizableObject.h + *@brief Declaration of OptimizableObject + */ +namespace qmcplusplus +{ +template +using OptVariablesTypeT = optimize::VariableSetT; + +template +class OptimizableObjectT +{ +public: + OptimizableObjectT(const std::string& name) : name_(name) {} + + const std::string& getName() const { return name_; } + bool isOptimized() const { return is_optimized_; } + +private: + /** Name of the optimizable object + */ + const std::string name_; + /** If true, this object is actively modified during WFOpt + */ + bool is_optimized_ = false; + +public: + /** check in variational parameters to the global list of parameters used by + * the optimizer. + * @param active a super set of optimizable variables + * + * The existing checkInVariables implementation in WFC/SPO/.. are inclusive + * and it calls checkInVariables of its members class A: public SPOSet {} + * class B: public WFC + * { + * A objA; + * checkInVariables() { objA.checkInVariables(); } + * }; + * + * With OptimizableObject, + * class A: public OptimizableObject {} + * class B: public OptimizableObject + * { + * A objA; + * checkInVariablesExclusive() { // should not call + * objA.checkInVariablesExclusive() if objA has been extracted; } + * }; + * A vector of OptimizableObject, will be created by calling + * extractOptimizableObjects(). All the checkInVariablesExclusive() will be + * called through this vector and thus checkInVariablesExclusive + * implementation should only handle non-OptimizableObject members. + */ + virtual void checkInVariablesExclusive(OptVariablesTypeT& active) = 0; + + /** reset the parameters during optimizations. Exclusive, see + * checkInVariablesExclusive + */ + virtual void resetParametersExclusive(const OptVariablesTypeT& active) = 0; + + /** print the state, e.g., optimizables */ + virtual void reportStatus(std::ostream& os) {} + + void setOptimization(bool state) { is_optimized_ = state; } + + /** Write the variational parameters for this object to the VP HDF file + * + * The hout parameter should come from VariableSet::writeToHDF + * + * Objects can use this function to store additional information to the + * file. + * + * By default the parameters are saved in VariableSet::writeToHDF, and + * objects do not need to implement this function (yet). + * + */ + virtual void writeVariationalParameters(hdf_archive& hout){}; + + /** Read the variational parameters for this object from the VP HDF file + * + * The hin parameter should come from VariableSet::readFromHDF + * + * By default the parameters are read in VariableSet::readFromHDF, and + * objects do not need to implement this function (yet). + */ + virtual void readVariationalParameters(hdf_archive& hin){}; +}; + +template +class UniqueOptObjRefsT : public RefVector> +{ +public: + OptimizableObjectT& operator[](size_t i) const { return RefVector>::operator[](i); } + + void push_back(OptimizableObjectT& obj) + { + if (obj.getName().empty()) + throw std::logic_error("BUG!! Only named OptimizableObject object " + "can be added to UniqueOptObjRefs!"); + auto result = std::find_if(this->begin(), this->end(), + [&](OptimizableObjectT& element) { return element.getName() == obj.getName(); }); + if (result == this->end()) + RefVector>::push_back(obj); + } +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/OrbitalSetTraits.h b/src/QMCWaveFunctions/OrbitalSetTraits.h index 7b35937067..39e56bdf62 100644 --- a/src/QMCWaveFunctions/OrbitalSetTraits.h +++ b/src/QMCWaveFunctions/OrbitalSetTraits.h @@ -54,6 +54,8 @@ struct OrbitalSetTraits //: public OrbitalTraits DIM = OHMMS_DIM }; using RealType = RealAlias; + using FullValueType = FullPrec; + using ComplexType = std::complex; using ValueType = T; using IndexType = int; using PosType = TinyVector; diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp new file mode 100644 index 0000000000..6641e45735 --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp @@ -0,0 +1,197 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file PWBasisT.cpp + * @brief Definition of member functions of Plane-wave basis set + */ +#include "PWBasisT.h" + +namespace qmcplusplus +{ +template +int PWBasisT::readbasis(hdf_archive& h5basisgroup, + RealType ecutoff, + const ParticleLayout& lat, + const std::string& pwname, + const std::string& pwmultname, + bool resizeContainer) +{ + ///make a local copy + Lattice = lat; + ecut = ecutoff; + app_log() << " PWBasisT::" << pwmultname << " is found " << std::endl; + h5basisgroup.read(gvecs, "/electrons/kpoint_0/gvectors"); + NumPlaneWaves = std::max(gvecs.size(), kplusgvecs_cart.size()); + if (NumPlaneWaves == 0) + { + app_error() << " PWBasisT::readbasis Basis is missing. Abort " << std::endl; + abort(); //FIX_ABORT + } + if (kplusgvecs_cart.empty()) + { + kplusgvecs_cart.resize(NumPlaneWaves); + for (int i = 0; i < NumPlaneWaves; i++) + kplusgvecs_cart[i] = Lattice.k_cart(gvecs[i]); + } + //app_log() << " Gx Gy Gz " << std::endl; + //for(int i=0; i(std::cout,"\n")); + return NumPlaneWaves; +} + +template +void PWBasisT::setTwistAngle(const PosType& tang) +{ + PosType dang = twist - tang; + bool sameTwist = dot(dang, dang) < std::numeric_limits::epsilon(); + if (maxmaxg && sameTwist) + return; + twist = tang; + reset(); +} + +template +void PWBasisT::reset() +{ + trimforecut(); + //logC.resize(3,2*maxmaxg+1); + Z.resize(NumPlaneWaves, 2 + DIM); + Zv.resize(NumPlaneWaves); + phi.resize(NumPlaneWaves); +} + +/** Remove basis elements if kinetic energy > ecut. + * + * Keep and indexmap so we know how to match coefficients on read. + */ +template +void PWBasisT::trimforecut() +{ + //Convert the twist angle to Cartesian coordinates. + twist_cart = Lattice.k_cart(twist); + inputmap.resize(NumPlaneWaves); + app_log() << " PWBasisT::TwistAngle (unit) =" << twist << std::endl; + app_log() << " PWBasisT::TwistAngle (cart) =" << twist_cart << std::endl; + app_log() << " PWBasisT::trimforecut NumPlaneWaves (before) =" << NumPlaneWaves << std::endl; + std::vector gvecCopy(gvecs); + std::vector gcartCopy(kplusgvecs_cart); + gvecs.clear(); + kplusgvecs_cart.clear(); + minusModKplusG2.reserve(NumPlaneWaves); + // RealType kcutoff2 = 2.0*ecut; //std::sqrt(2.0*ecut); + int ngIn = NumPlaneWaves; + for (int ig = 0, newig = 0; ig < ngIn; ig++) + { + //PosType tempvec = Lattice.k_cart(gvecCopy[ig]+twist); + PosType tempvec = gcartCopy[ig] + twist_cart; + RealType mod2 = dot(tempvec, tempvec); + + // Keep all the g-vectors + // The cutoff energy is not stored in the HDF file now. + // Is truncating the gvectors to a spherical shell necessary? + if (true) + { + gvecs.push_back(gvecCopy[ig]); + kplusgvecs_cart.push_back(tempvec); + minusModKplusG2.push_back(-mod2); + //Remember which position in the HDF5 file this came from...for coefficients + inputmap[ig] = newig++; + } +#if 0 + if(mod2<=kcutoff2) + { + gvecs.push_back(gvecCopy[ig]); + kplusgvecs_cart.push_back(tempvec); + minusModKplusG2.push_back(-mod2); + //Remember which position in the HDF5 file this came from...for coefficients + inputmap[ig] = newig++; + } + else + { + inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves. + NumPlaneWaves--; + } +#endif + } +#if defined(PWBasisT_USE_RECURSIVE) + //Store the maximum number of translations, within ecut, of any reciprocal cell vector. + for (int ig = 0; ig < NumPlaneWaves; ig++) + for (int i = 0; i < OHMMS_DIM; i++) + if (std::abs(gvecs[ig][i]) > maxg[i]) + maxg[i] = std::abs(gvecs[ig][i]); + gvecs_shifted.resize(NumPlaneWaves); + for (int ig = 0; ig < NumPlaneWaves; ig++) + gvecs_shifted[ig] = gvecs[ig] + maxg; + maxmaxg = std::max(maxg[0], std::max(maxg[1], maxg[2])); + //changes the order???? ok + C.resize(3, 2 * maxmaxg + 2); +#else + maxmaxg = 1; +#endif + // //make a copy of input to gvecCopy + //// for(int ig=0, newig=0; ig 0) + //// negative.push_back(1); + //// else { //gx == 0, test gy + //// if(gvecCopy[ig][1] < 0) + //// negative.push_back(0); + //// else if(gvecCopy[ig][1] > 0) + //// negative.push_back(1); + //// else { //gx == gy == 0; test gz. If gz==0 also, take negative=1 (arbitrary) + //// if(gvecCopy[ig][2] < 0) + //// negative.push_back(0); + //// else + //// negative.push_back(1); + //// } + //// } + ////#endif + // } else { + // inputmap[ig] = -1; //Temporary value...need to know final NumPlaneWaves. + // NumPlaneWaves--; + // } + // } + //Finalize the basis. Fix temporary values of inputmap. + //for(int ig=0; igecut + app_log() << " NumPlaneWaves (after) =" << NumPlaneWaves << std::endl; +} +// template class PWBasisT; +// template class PWBasisT; +template class PWBasisT>; +template class PWBasisT>; +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h new file mode 100644 index 0000000000..9cdc92e73c --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h @@ -0,0 +1,349 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file PWBasis.h + * @brief Declaration of Plane-wave basis set + */ +#ifndef QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H +#define QMCPLUSPLUS_PLANEWAVEBASIST_BLAS_H + +#include "CPU/e2iphi.h" +#include "Configuration.h" +#include "Message/Communicate.h" +#include "Particle/ParticleSet.h" +#include "hdf/hdf_archive.h" +#include "type_traits/complex_help.hpp" + +/** If defined, use recursive method to build the basis set for each position + * + * performance improvement is questionable: load vs sin/cos + */ +// #define PWBASIS_USE_RECURSIVE + +namespace qmcplusplus +{ +/** Plane-wave basis set + * + * Rewrite of PlaneWaveBasis to utilize blas II or III + * Support more general input tags + */ +template +class PWBasisT : public QMCTraits +{ +public: + using RealType = typename RealAlias_impl::value_type; + using ComplexType = T; + using PosType = TinyVector; + using IndexType = QMCTraits::IndexType; + using ParticleLayout = typename ParticleSetT::ParticleLayout; + using GIndex_t = TinyVector; + +private: + /// max of maxg[i] + int maxmaxg; + // Need to store the maximum translation in each dimension to use recursive + // PW generation. + GIndex_t maxg; + // The PlaneWave data - keep all of these strictly private to prevent + // inconsistencies. + RealType ecut; + /// twist angle in reduced + PosType twist; + /// twist angle in cartesian + PosType twist_cart; // Twist angle in reduced and Cartesian. + + /// gvecs in reduced coordiates + std::vector gvecs; + /// Reduced coordinates with offset + /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim] + std::vector gvecs_shifted; + + std::vector minusModKplusG2; + std::vector kplusgvecs_cart; // Cartesian. + + Matrix C; + // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not + // exp(iGr) We need a way of switching between them for G -> -G, otherwise + // the determinant will have multiple rows that are equal (to within a + // constant factor) of others, giving a zero determinant. For this, we build + // a vector (negative) which stores whether a vector is "+" or "-" (with + // some criterion, to be defined). We the switch from cos() to sin() based + // on the value of this input. + std::vector negative; + +public: + // enumeration for the value, laplacian, gradients and size + enum + { + PW_VALUE, + PW_LAP, + PW_GRADX, + PW_GRADY, + PW_GRADZ, + PW_MAXINDEX + }; + + Matrix Z; + + Vector Zv; + /* inputmap is used for a memory efficient way of + * + * importing the basis-set and coefficients when the desired energy cutoff + * may be lower than that represented by all data in the wavefunction input + * file. The steps taken are: + * - Read all basis data. + * - Create map. inputmap[i] = j; j is correct PW index, i is input coef + * index. For basis elements outside cutoff, inputmap[i] = gvecs.size(); + * - Coefficients are in same order as PWs in inputfile => simply file into + * storage matrix using the map as the input. All excess coefficients are + * put into [gvecs.size()] and not used. i.e. coefs need to be allocated + * 1 higher. Such an approach is not needed for Gamma-point only + * calculations because the basis is spherically ordered. However, when a + * twist-angle is used, the "sphere" of allowed planewaves is shifted. + */ + + Vector phi; + + std::vector inputmap; + + /// total number of basis functions + int NumPlaneWaves; + + /// local copy of Lattice + ParticleLayout Lattice; + + /// default constructor + PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {} + + /// constructor + PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {} + + ~PWBasisT() {} + + /// set the twist angle + void setTwistAngle(const PosType& tang); + + /// reset + void reset(); + + /** Read basisset from hdf5 file. Apply ecut. + * @param h5basisgroup h5 node where basis is located + * @param ecutoff cutoff energy + * @param lat CrystalLattice + * @param resizeContainer if true, resize internal storage. + * @return the number of plane waves + */ + int readbasis(hdf_archive& h5basisgroup, + RealType ecutoff, + const ParticleLayout& lat, + const std::string& pwname = "planewaves", + const std::string& pwmultname = "multipliers", + bool resizeContainer = true); + + /** Remove basis elements if kinetic energy > ecut. + * + * Keep and indexmap so we know how to match coefficients on read. + */ + void trimforecut(); + +#if defined(PWBASIS_USE_RECURSIVE) + /** Fill the recursion coefficients matrix. + * + * @todo Generalize to non-orthorohmbic cells + */ + inline void BuildRecursionCoefs(const PosType& pos) + { + PosType tau_red(Lattice.toUnit(pos)); +// RealType phi=TWOPI*tau_red[0]; +// RealType nphi=maxg0*phi; +// ComplexType ct0(std::cos(phi),std::sin(phi)); +// ComplexType t(std::cos(nphi),-std::sin(nphi)); +// C0[0]=t; +// for(int n=1; n<=2*maxg0; n++) C0[n] = (t *= ct0); +// +// phi=TWOPI*tau_red[1]; +// nphi=maxg1*phi; +// ct0=ComplexType(std::cos(phi),std::sin(phi)); +// t=ComplexType(std::cos(nphi),-std::sin(nphi)); +// C1[0]=t; +// for(int n=1; n<=2*maxg1; n++) C1[n] = (t *= ct0); +// +// phi=TWOPI*tau_red[2]; +// nphi=maxg2*phi; +// ct0=ComplexType(std::cos(phi),std::sin(phi)); +// t=ComplexType(std::cos(nphi),-std::sin(nphi)); +// C2[0]=t; +// for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0); +#pragma ivdep + for (int idim = 0; idim < 3; idim++) + { + int ng = maxg[idim]; + RealType phi = TWOPI * tau_red[idim]; + RealType nphi = ng * phi; + ComplexType Ctemp(std::cos(phi), std::sin(phi)); + ComplexType t(std::cos(nphi), -std::sin(nphi)); + ComplexType* restrict cp_ptr = C[idim]; + *cp_ptr++ = t; + for (int n = 1; n <= 2 * ng; n++) + { + *cp_ptr++ = (t *= Ctemp); + } + } + // Base version + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // RealType phi=TWOPI*tau_red[idim]; + // ComplexType Ctemp(std::cos(phi),std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + // Not valid for general supercell + // // Cartesian of twist for 1,1,1 (reduced coordinates) + // PosType G111(1.0,1.0,1.0); + // G111 = Lattice.k_cart(G111); + // + // //Precompute a small number of complex factors (PWs along + // b1,b2,b3 lines) + // //using a fast recursion algorithm + // #pragma ivdep + // for(int idim=0; idim<3; idim++){ + // //start the recursion with the 111 vector. + // RealType phi = pos[idim] * G111[idim]; + // register ComplexType Ctemp(std::cos(phi), std::sin(phi)); + // int ng=maxg[idim]; + // ComplexType* restrict cp_ptr=C[idim]+ng; + // ComplexType* restrict cn_ptr=C[idim]+ng-1; + // *cp_ptr=1.0; + // for(int n=1; n<=ng; n++,cn_ptr--){ + // ComplexType t(Ctemp*(*cp_ptr++)); + // *cp_ptr = t; + // *cn_ptr = conj(t); + // } + // } + } + + inline void evaluate(const PosType& pos) + { + BuildRecursionCoefs(pos); + RealType twistdotr = dot(twist_cart, pos); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) + { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr)); + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + Zv[ig] = pw; + } + } + /** Evaluate all planewaves and derivatives for the iat-th particle + * + * The basis functions are evaluated for particles iat: first <= iat < last + * Evaluate the plane-waves at current particle coordinates using a fast + * recursion algorithm. Order of Y,dY and d2Y is kept correct. + * These can be "dotted" with coefficients later to complete orbital + * evaluations. + */ + inline void evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + BuildRecursionCoefs(r); + RealType twistdotr = dot(twist_cart, r); + ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr)); + // Evaluate the planewaves and derivatives. + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) + { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType pw(pw0); + // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed + for (int idim = 0; idim < 3; idim++) + pw *= C(idim, gvecs_shifted[ig][idim]); + // pw *= C0[gvecs_shifted[ig][0]]; + // pw *= C1[gvecs_shifted[ig][1]]; + // pw *= C2[gvecs_shifted[ig][2]]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); + } + } +#else + inline void evaluate(const PosType& pos) + { + // Evaluate the planewaves for particle iat. + for (int ig = 0; ig < NumPlaneWaves; ig++) + phi[ig] = dot(kplusgvecs_cart[ig], pos); + eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data()); + } + inline void evaluateAll(const ParticleSetT& P, int iat) + { + const PosType& r(P.activeR(iat)); + evaluate(r); + ComplexType* restrict zptr = Z.data(); + for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) + { + // PW is initialized as exp(i*twist.r) so that the final basis + // evaluations are for (twist+G).r + ComplexType& pw = Zv[ig]; + zptr[0] = pw; + zptr[1] = minusModKplusG2[ig] * pw; + zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real()); + zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real()); + zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real()); + } + } +#endif + // /** Fill the recursion coefficients matrix. + // * + // * @todo Generalize to non-orthorohmbic cells + // */ + // void BuildRecursionCoefsByAdd(const PosType& pos) + // { + // // Cartesian of twist for 1,1,1 (reduced coordinates) + // PosType G111(1.0,1.0,1.0); + // G111 = Lattice.k_cart(G111); + // //PosType redP=P.Lattice.toUnit(P.R[iat]); + // //Precompute a small number of complex factors (PWs along b1,b2,b3 + // lines) for(int idim=0; idim<3; idim++){ + // //start the recursion with the 111 vector. + // RealType phi = pos[idim] * G111[idim]; + // int ng(maxg[idim]); + // RealType* restrict cp_ptr=logC[idim]+ng; + // RealType* restrict cn_ptr=logC[idim]+ng-1; + // *cp_ptr=0.0; + // //add INTEL vectorization + // for(int n=1; n<=ng; n++,cn_ptr--){ + // RealType t(phi+*cp_ptr++); + // *cp_ptr = t; + // *cn_ptr = -t; + // } + // } + // } +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp new file mode 100644 index 0000000000..899042244d --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp @@ -0,0 +1,150 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "PWOrbitalSetT.h" + +#include "Message/Communicate.h" +#include "Numerics/MatrixOperators.h" + +namespace qmcplusplus +{ +template +PWOrbitalSetT::~PWOrbitalSetT() +{ + if (OwnBasisSet && myBasisSet) + delete myBasisSet; + if (!IsCloned && this->C != nullptr) + delete this->C; +} + +template +std::unique_ptr> PWOrbitalSetT::makeClone() const +{ + auto myclone = std::make_unique>(*this); + myclone->myBasisSet = new PWBasisT(*myBasisSet); + myclone->IsCloned = true; + return myclone; +} + +template +void PWOrbitalSetT::setOrbitalSetSize(int norbs) +{} + +template +void PWOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) +{ + myBasisSet = bset; + this->OrbitalSetSize = nbands; + OwnBasisSet = cleanup; + BasisSetSize = myBasisSet->NumPlaneWaves; + this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize); + this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); + app_log() << " PWOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize + << " BasisSetSize = " << BasisSetSize << std::endl; +} + +template +void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +{ + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } +} + +template +void PWOrbitalSetT::addVector(const std::vector& coefs, int jorb) +{ + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + // drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + (*(this->C))[jorb][inputmap[ig]] = coefs[ig]; + } +} + +template +void PWOrbitalSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + // Evaluate every orbital for particle iat. + // Evaluate the basis-set at these coordinates: + // myBasisSet->evaluate(P,iat); + myBasisSet->evaluate(P.activeR(iat)); + MatrixOperators::product(*(this->C), myBasisSet->Zv, psi); +} + +template +void PWOrbitalSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + // Evaluate the orbitals and derivatives for particle iat only. + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); + const T* restrict tptr = this->Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + psi[j] = tptr[PW_VALUE]; + d2psi[j] = tptr[PW_LAP]; + dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + } +} + +template +void PWOrbitalSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + for (int iat = first, i = 0; iat < last; iat++, i++) + { + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(*(this->C), myBasisSet->Z, this->Temp); + const T* restrict tptr = this->Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + logdet(i, j) = tptr[PW_VALUE]; + d2logdet(i, j) = tptr[PW_LAP]; + dlogdet(i, j) = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]); + } + } +} + +// Class concrete types from T +// NOTE: This class only gets compiled if QMC_COMPLEX is defined, thus it is +// inherently complex template class PWOrbitalSetT; template class +// PWOrbitalSetT; +template class PWOrbitalSetT>; +template class PWOrbitalSetT>; +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h new file mode 100644 index 0000000000..848a10b3ea --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h @@ -0,0 +1,125 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +/** @file PWOrbitalSetT.h + * @brief Definition of member functions of Plane-wave basis set + */ +#ifndef QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H +#define QMCPLUSPLUS_PLANEWAVE_ORBITALSETT_BLAS_H + +#include "CPU/BLAS.hpp" +#include "QMCWaveFunctions/PlaneWave/PWBasisT.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "type_traits/complex_help.hpp" + +namespace qmcplusplus +{ + +template +class PWOrbitalSetT : public SPOSetT +{ +public: + using RealType = typename SPOSetT::RealType; + using ComplexType = T; + using PosType = typename SPOSetT::PosType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using IndexType = typename SPOSetT::IndexType; + + using BasisSet_t = PWBasisT; + using PWBasisPtr = PWBasisT*; + + /** inherit the enum of BasisSet_t */ + enum + { + PW_VALUE = BasisSet_t::PW_VALUE, + PW_LAP = BasisSet_t::PW_LAP, + PW_GRADX = BasisSet_t::PW_GRADX, + PW_GRADY = BasisSet_t::PW_GRADY, + PW_GRADZ = BasisSet_t::PW_GRADZ, + PW_MAXINDEX = BasisSet_t::PW_MAXINDEX + }; + + /** default constructor + */ + PWOrbitalSetT(const std::string& my_name) + : SPOSetT(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false) + {} + + std::string getClassName() const override { return "PWOrbitalSetT"; } + + /** delete BasisSet only it owns this + * + * Builder takes care of who owns what + */ + ~PWOrbitalSetT() override; + + std::unique_ptr> makeClone() const override; + /** resize the orbital base + * @param bset PWBasis + * @param nbands number of bands + * @param cleaup if true, owns PWBasis. Will clean up. + */ + void resize(PWBasisPtr bset, int nbands, bool cleanup = false); + + /** Builder class takes care of the assertion + */ + void addVector(const std::vector& coefs, int jorb); + void addVector(const std::vector& coefs, int jorb); + + void setOrbitalSetSize(int norbs) override; + + inline T evaluate(int ib, const PosType& pos) + { + myBasisSet->evaluate(pos); + return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data()); + } + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + /** boolean + * + * If true, this has to delete the BasisSet + */ + bool OwnBasisSet; + /// TwistAngle of this PWOrbitalSetT + PosType TwistAngle; + /// My basis set + PWBasisPtr myBasisSet; + /// number of basis + IndexType BasisSetSize; + /** pointer to matrix containing the coefficients + * + * makeClone makes a shallow copy and flag IsCloned + */ + ValueMatrix* C; + /// if true, do not clean up + bool IsCloned; + + /** temporary array to perform gemm operation */ + Matrix Temp; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp new file mode 100644 index 0000000000..3286624090 --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.cpp @@ -0,0 +1,165 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file PWRealOrbitalSetT.cpp + * @brief declaration of the member functions of PWRealOrbitalSetT + * + * Not the most optimized method to use wavefunctions in a plane-wave basis. + */ +#include "Message/Communicate.h" +#include "PWRealOrbitalSetT.h" +#include "Numerics/MatrixOperators.h" +#include "type_traits/ConvertToReal.h" + +namespace qmcplusplus +{ +template +PWRealOrbitalSetT::~PWRealOrbitalSetT() +{ + if (OwnBasisSet && myBasisSet) + delete myBasisSet; +} + +template +std::unique_ptr> PWRealOrbitalSetT::makeClone() const +{ + auto myclone = std::make_unique>(*this); + myclone->myBasisSet = new PWBasis(*(this->myBasisSet)); + return myclone; +} + +template +void PWRealOrbitalSetT::setOrbitalSetSize(int norbs) +{} + +template +void PWRealOrbitalSetT::resize(PWBasisPtr bset, int nbands, bool cleanup) +{ + myBasisSet = bset; + this->OrbitalSetSize = nbands; + OwnBasisSet = cleanup; + BasisSetSize = myBasisSet->NumPlaneWaves; + CC.resize(this->OrbitalSetSize, BasisSetSize); + Temp.resize(this->OrbitalSetSize, PW_MAXINDEX); + tempPsi.resize(this->OrbitalSetSize); + app_log() << " PWRealOrbitalSetT::resize OrbitalSetSize =" << this->OrbitalSetSize + << " BasisSetSize = " << BasisSetSize << std::endl; +} + +template +void PWRealOrbitalSetT::addVector(const std::vector& coefs, int jorb) +{ + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + //drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + CC[jorb][inputmap[ig]] = coefs[ig]; + } +} + +template +void PWRealOrbitalSetT::addVector(const std::vector& coefs, int jorb) +{ + int ng = myBasisSet->inputmap.size(); + if (ng != coefs.size()) + { + app_error() << " Input G map does not match the basis size of wave functions " << std::endl; + OHMMS::Controller->abort(); + } + //drop G points for the given TwistAngle + const std::vector& inputmap(myBasisSet->inputmap); + for (int ig = 0; ig < ng; ig++) + { + if (inputmap[ig] > -1) + CC[jorb][inputmap[ig]] = coefs[ig]; + } +} + +template +void PWRealOrbitalSetT::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +{ + myBasisSet->evaluate(P.activeR(iat)); + MatrixOperators::product(CC, myBasisSet->Zv, tempPsi); + for (int j = 0; j < this->OrbitalSetSize; j++) + psi[j] = tempPsi[j].real(); +} + +template +void PWRealOrbitalSetT::evaluateVGL(const ParticleSet& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(CC, myBasisSet->Z, Temp); + const ComplexType* restrict tptr = Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + psi[j] = tptr[PW_VALUE].real(); + d2psi[j] = tptr[PW_LAP].real(); +#if OHMMS_DIM == 3 + dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real()); +#elif OHMMS_DIM == 2 + dpsi[j] = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real()); +#elif OHMMS_DIM == 1 + dpsi[j] = GradType(tptr[PW_GRADX].real()); +#else +#error "Only physical dimensions 1/2/3 are supported." +#endif + } +} + +template +void PWRealOrbitalSetT::evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + for (int iat = first, i = 0; iat < last; iat++, i++) + { + myBasisSet->evaluateAll(P, iat); + MatrixOperators::product(CC, myBasisSet->Z, Temp); + const ComplexType* restrict tptr = Temp.data(); + for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) + { + convertToReal(tptr[PW_VALUE], logdet(i, j)); + convertToReal(tptr[PW_LAP], d2logdet(i, j)); +#if OHMMS_DIM == 3 + dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real(), tptr[PW_GRADZ].real()); +#elif OHMMS_DIM == 2 + dlogdet(i, j) = GradType(tptr[PW_GRADX].real(), tptr[PW_GRADY].real()); +#elif OHMMS_DIM == 1 + dlogdet(i, j) = GradType(tptr[PW_GRADX].real()); +#else +#error "Only physical dimensions 1/2/3 are supported." +#endif + } + } +} + +template class SPOSetT; +template class SPOSetT; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h new file mode 100644 index 0000000000..8455b1e561 --- /dev/null +++ b/src/QMCWaveFunctions/PlaneWave/PWRealOrbitalSetT.h @@ -0,0 +1,143 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file PWRealOrbitalSetT.h + * @brief Define PWRealOrbitalSetT derived from SPOSetT + * + * This is a specialized single-particle orbital set for real trial + * wavefunctions and enabled with QMC_COMPLEX=0 + */ +#ifndef QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H +#define QMCPLUSPLUS_PLANEWAVE_REALORBITALSETT_BLAS_H + +#include "QMCWaveFunctions/PlaneWave/PWBasis.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "CPU/BLAS.hpp" + +namespace qmcplusplus +{ +template +class PWRealOrbitalSetT : public SPOSetT +{ +public: + using BasisSet_t = PWBasis; + using PWBasisPtr = PWBasis*; + + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessMatrix = typename SPOSetT::HessMatrix; + using PosType = typename SPOSetT::PosType; + + /** inherit the enum of BasisSet_t */ + enum + { + PW_VALUE = BasisSet_t::PW_VALUE, + PW_LAP = BasisSet_t::PW_LAP, + PW_GRADX = BasisSet_t::PW_GRADX, + PW_GRADY = BasisSet_t::PW_GRADY, + PW_GRADZ = BasisSet_t::PW_GRADZ, + PW_MAXINDEX = BasisSet_t::PW_MAXINDEX + }; + + /** default constructor + */ + PWRealOrbitalSetT(const std::string& my_name) + : SPOSetT(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0) + {} + + std::string getClassName() const override { return "PWRealOrbitalSetT"; } + + /** delete BasisSet only it owns this + * + * Builder takes care of who owns what + */ + ~PWRealOrbitalSetT() override; + + std::unique_ptr> makeClone() const override; + + /** resize the orbital base + * @param bset PWBasis + * @param nbands number of bands + * @param cleaup if true, owns PWBasis. Will clean up. + */ + void resize(PWBasisPtr bset, int nbands, bool cleanup = false); + + /** add eigenstate for jorb-th orbital + * @param coefs real input data + * @param jorb orbital index + */ + void addVector(const std::vector& coefs, int jorb); + + /** add eigenstate for jorb-th orbital + * @param coefs complex input data + * @param jorb orbital index + */ + void addVector(const std::vector& coefs, int jorb); + + void setOrbitalSetSize(int norbs) override; + + inline T evaluate(int ib, const PosType& pos) + { + myBasisSet->evaluate(pos); + return real(BLAS::dot(BasisSetSize, CC[ib], myBasisSet->Zv.data())); + } + + void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + void evaluate_notranspose(const ParticleSet& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + APP_ABORT("Need specialization of evaluate_notranspose() for grad_grad_logdet. \n"); + } + + + /** boolean + * + * If true, this has to delete the BasisSet + */ + bool OwnBasisSet; + ///TwistAngle of this PWRealOrbitalSet + PosType TwistAngle; + ///My basis set + PWBasisPtr myBasisSet; + ///number of basis + IndexType BasisSetSize; + ///Plane-wave coefficients of complex: (iband,g-vector) + Matrix CC; + /// temporary array to perform gemm operation + Matrix Temp; + ///temporary complex vector before assigning to a real psi + Vector tempPsi; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h index 6e4c45d525..8c7e17a8f6 100644 --- a/src/QMCWaveFunctions/RotatedSPOs.h +++ b/src/QMCWaveFunctions/RotatedSPOs.h @@ -13,445 +13,11 @@ #ifndef QMCPLUSPLUS_ROTATION_HELPER_H #define QMCPLUSPLUS_ROTATION_HELPER_H -#include "QMCWaveFunctions/SPOSet.h" - +#include "QMCWaveFunctions/RotatedSPOsT.h" namespace qmcplusplus { -class RotatedSPOs; -namespace testing -{ -opt_variables_type& getMyVarsFull(RotatedSPOs& rot); -std::vector>& getHistoryParams(RotatedSPOs& rot); -} // namespace testing - -class RotatedSPOs : public SPOSet, public OptimizableObject -{ -public: - //constructor - RotatedSPOs(const std::string& my_name, std::unique_ptr&& spos); - //destructor - ~RotatedSPOs() override; - - std::string getClassName() const override { return "RotatedSPOs"; } - bool isOptimizable() const override { return true; } - bool isOMPoffload() const override { return Phi->isOMPoffload(); } - bool hasIonDerivs() const override { return Phi->hasIonDerivs(); } - - // Vector of rotation matrix indices - using RotationIndices = std::vector>; - - // Active orbital rotation parameter indices - RotationIndices m_act_rot_inds; - - // Full set of rotation values for global rotation - RotationIndices m_full_rot_inds; - - // Construct a list of the matrix indices for non-zero rotation parameters. - // (The structure for a sparse representation of the matrix) - // Only core->active rotations are created. - static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); - - // Construct a list for all the matrix indices, including core->active, core->core and active->active - static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); - - // Fill in antisymmetric matrix from the list of rotation parameter indices - // and a list of parameter values. - // This function assumes rot_mat is properly sized upon input and is set to zero. - static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat); - - // Extract the list of rotation parameters from the entries in an antisymmetric matrix - // This function expects rot_indices and param are the same length. - static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param); - - //function to perform orbital rotations - void apply_rotation(const std::vector& param, bool use_stored_copy); - - // For global rotation, inputs are the old parameters and the delta parameters. - // The corresponding rotation matrices are constructed, multiplied together, - // and the new parameters extracted. - // The new rotation is applied to the underlying SPO coefficients - void applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param); - - // Perform the construction of matrices and extraction of parameters for a delta rotation. - // Split out and made static for testing. - static void constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat); - - // When initializing the rotation from VP files - // This function applies the rotation history - void applyRotationHistory(); - - // This function applies the global rotation (similar to apply_rotation, but for the full - // set of rotation parameters) - void applyFullRotation(const std::vector& full_param, bool use_stored_copy); - - // Compute matrix exponential of an antisymmetric matrix (result is rotation matrix) - static void exponentiate_antisym_matrix(ValueMatrix& mat); - - // Compute matrix log of rotation matrix to produce antisymmetric matrix - static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); - - //A particular SPOSet used for Orbitals - std::unique_ptr Phi; - - /// Set the rotation parameters (usually from input file) - void setRotationParameters(const std::vector& param_list); - - /// the number of electrons of the majority spin - size_t nel_major_; - - std::unique_ptr makeClone() const override; - - // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the Determinant part of the wfn - // myG_J is the Gradient of the all other parts of the wavefunction (typically just the Jastrow). - // It represents \frac{\nabla\psi_{J}}{\psi_{J}} - // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The Laplacian portion - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; - - ValueMatrix Bbar; - ValueMatrix psiM_inv; - ValueMatrix psiM_all; - GradMatrix dpsiM_all; - ValueMatrix d2psiM_all; - - - // Single Slater creation - void buildOptVariables(size_t nel); - - // For the MSD case rotations must be created in MultiSlaterDetTableMethod class - void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); - - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) override; - - void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) override; - - void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) override; - - //helper function to evaluatederivative; evaluate orbital rotation parameter derivative using table method - void table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - void table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs) override { opt_obj_refs.push_back(*this); } - - void checkInVariablesExclusive(opt_variables_type& active) override - { - if (myVars.size()) - active.insertFrom(myVars); - } - - void checkOutVariables(const opt_variables_type& active) override { myVars.getIndex(active); } - - ///reset - void resetParametersExclusive(const opt_variables_type& active) override; - - void writeVariationalParameters(hdf_archive& hout) override; - - void readVariationalParameters(hdf_archive& hin) override; - - //********************************************************************************* - //the following functions simply call Phi's corresponding functions - void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); } - - void checkObject() const override { Phi->checkObject(); } - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateValue(P, iat, psi); - } - - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); - } - - void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) override - { - Phi->evaluateDetRatios(VP, psi, psiinv, ratios); - } - - void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) override; - - void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi) override - { - assert(psi.size() <= OrbitalSetSize); - Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); - } - - - void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) override - { - Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); - } - - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet); - } - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) override - { - Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); - } - - void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) override - { - Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi); - } - - // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) - // {Phi->evaluateThridDeriv(P, first, last, grad_grad_grad_logdet); } - - /// Use history list (false) or global rotation (true) - void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; } - - void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const override; - - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const override; - - void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const override; - - void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const override; - - void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void createResource(ResourceCollection& collection) const override; - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - -private: - /// true if SPO parameters (orbital rotation parameters) have been supplied by input - bool params_supplied; - /// list of supplied orbital rotation parameters - std::vector params; - - /// Full set of rotation matrix parameters for use in global rotation method - opt_variables_type myVarsFull; - - /// timer for apply_rotation - NewTimer& apply_rotation_timer_; - - /// List of previously applied parameters - std::vector> history_params_; - - static RefVectorWithLeader extractPhiRefList(const RefVectorWithLeader& spo_list); - - /// Use global rotation or history list - bool use_global_rot_ = true; - - friend opt_variables_type& testing::getMyVarsFull(RotatedSPOs& rot); - friend std::vector>& testing::getHistoryParams(RotatedSPOs& rot); -}; - +using RotatedSPOs = RotatedSPOsT; } //namespace qmcplusplus diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp similarity index 54% rename from src/QMCWaveFunctions/RotatedSPOs.cpp rename to src/QMCWaveFunctions/RotatedSPOsT.cpp index 0815484c5e..a0b557591e 100644 --- a/src/QMCWaveFunctions/RotatedSPOs.cpp +++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp @@ -1,52 +1,52 @@ ////////////////////////////////////////////////////////////////////////////////////// -//// This file is distributed under the University of Illinois/NCSA Open Source License. -//// See LICENSE file in top directory for details. -//// -//// Copyright (c) QMCPACK developers. -//// -//// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//// Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley -//// Ye Luo, yeluo@anl.gov, Argonne National Laboratory -//// -//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley -//////////////////////////////////////////////////////////////////////////////////////// -#include "RotatedSPOs.h" -#include "Numerics/MatrixOperators.h" -#include "Numerics/DeterminantOperators.h" +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers +// +// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley +// Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley +////////////////////////////////////////////////////////////////////////////////////// + +#include "RotatedSPOsT.h" + #include "CPU/BLAS.hpp" +#include "Numerics/DeterminantOperators.h" +#include "Numerics/MatrixOperators.h" #include "io/hdf/hdf_archive.h" - namespace qmcplusplus { -RotatedSPOs::RotatedSPOs(const std::string& my_name, std::unique_ptr&& spos) - : SPOSet(my_name), - OptimizableObject(my_name), - Phi(std::move(spos)), - nel_major_(0), - params_supplied(false), - apply_rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine)) +template +RotatedSPOsT::RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos) + : SPOSetT(my_name), OptimizableObjectT(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false) { - OrbitalSetSize = Phi->getOrbitalSetSize(); + this->OrbitalSetSize = Phi->getOrbitalSetSize(); } -RotatedSPOs::~RotatedSPOs() {} +template +RotatedSPOsT::~RotatedSPOsT() +{} - -void RotatedSPOs::setRotationParameters(const std::vector& param_list) +template +void RotatedSPOsT::setRotationParameters(const std::vector& param_list) { params = param_list; params_supplied = true; } -void RotatedSPOs::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) +template +void RotatedSPOsT::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices) { for (int i = 0; i < nel; i++) for (int j = nel; j < nmo; j++) rot_indices.emplace_back(i, j); } -void RotatedSPOs::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) +template +void RotatedSPOsT::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices) { rot_indices.reserve(nmo * (nmo - 1) / 2); @@ -67,9 +67,10 @@ void RotatedSPOs::createRotationIndicesFull(int nel, int nmo, RotationIndices& r rot_indices.emplace_back(i, j); } -void RotatedSPOs::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, - const std::vector& param, - ValueMatrix& rot_mat) +template +void RotatedSPOsT::constructAntiSymmetricMatrix(const RotationIndices& rot_indices, + const std::vector& param, + ValueMatrix& rot_mat) { assert(rot_indices.size() == param.size()); // Assumes rot_mat is of the correct size @@ -87,9 +88,10 @@ void RotatedSPOs::constructAntiSymmetricMatrix(const RotationIndices& rot_indice } } -void RotatedSPOs::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, - const ValueMatrix& rot_mat, - std::vector& param) +template +void RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, + const ValueMatrix& rot_mat, + std::vector& param) { assert(rot_indices.size() == param.size()); // Assumes rot_mat is of the correct size @@ -102,7 +104,8 @@ void RotatedSPOs::extractParamsFromAntiSymmetricMatrix(const RotationIndices& ro } } -void RotatedSPOs::resetParametersExclusive(const opt_variables_type& active) +template +void RotatedSPOsT::resetParametersExclusive(const OptVariablesTypeT& active) { std::vector delta_param(m_act_rot_inds.size()); @@ -119,9 +122,9 @@ void RotatedSPOs::resetParametersExclusive(const opt_variables_type& active) for (int i = 0; i < m_act_rot_inds.size(); i++) { - int loc = myVars.where(i); - delta_param[i] = active[loc] - myVars[i]; - myVars[i] = active[loc]; + int loc = this->myVars.where(i); + delta_param[i] = active[loc] - this->myVars[i]; + this->myVars[i] = active[loc]; } if (use_global_rot_) @@ -144,13 +147,14 @@ void RotatedSPOs::resetParametersExclusive(const opt_variables_type& active) } } -void RotatedSPOs::writeVariationalParameters(hdf_archive& hout) +template +void RotatedSPOsT::writeVariationalParameters(hdf_archive& hout) { - hout.push("RotatedSPOs"); + hout.push("RotatedSPOsT"); if (use_global_rot_) { hout.push("rotation_global"); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); + std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); int nparam_full = myVarsFull.size(); std::vector full_params(nparam_full); @@ -173,20 +177,21 @@ void RotatedSPOs::writeVariationalParameters(hdf_archive& hout) for (size_t j = 0; j < cols; j++) tmp(i, j) = history_params_[i][j]; - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); + std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); hout.write(tmp, rot_hist_name); hout.pop(); } // Save myVars in order to restore object state exactly - // The values aren't meaningful, but they need to match those saved in VariableSet + // The values aren't meaningful, but they need to match those saved in + // VariableSet hout.push("rotation_params"); - std::string rot_params_name = std::string("rotation_params_") + SPOSet::getName(); + std::string rot_params_name = std::string("rotation_params_") + SPOSetT::getName(); - int nparam = myVars.size(); + int nparam = this->myVars.size(); std::vector params(nparam); for (int i = 0; i < nparam; i++) - params[i] = myVars[i]; + params[i] = this->myVars[i]; hout.write(params, rot_params_name); hout.pop(); @@ -194,20 +199,20 @@ void RotatedSPOs::writeVariationalParameters(hdf_archive& hout) hout.pop(); } -void RotatedSPOs::readVariationalParameters(hdf_archive& hin) +template +void RotatedSPOsT::readVariationalParameters(hdf_archive& hin) { - hin.push("RotatedSPOs", false); + hin.push("RotatedSPOsT", false); bool grp_hist_exists = hin.is_group("rotation_history"); bool grp_global_exists = hin.is_group("rotation_global"); if (!grp_hist_exists && !grp_global_exists) app_warning() << "Rotation parameters not found in VP file"; - if (grp_global_exists) { hin.push("rotation_global", false); - std::string rot_global_name = std::string("rotation_global_") + SPOSet::getName(); + std::string rot_global_name = std::string("rotation_global_") + SPOSetT::getName(); std::vector sizes(1); if (!hin.getShape(rot_global_name, sizes)) @@ -235,7 +240,7 @@ void RotatedSPOs::readVariationalParameters(hdf_archive& hin) else if (grp_hist_exists) { hin.push("rotation_history", false); - std::string rot_hist_name = std::string("rotation_history_") + SPOSet::getName(); + std::string rot_hist_name = std::string("rotation_history_") + SPOSetT::getName(); std::vector sizes(2); if (!hin.getShape(rot_hist_name, sizes)) throw std::runtime_error("Failed to read rotation history in VP file"); @@ -258,14 +263,14 @@ void RotatedSPOs::readVariationalParameters(hdf_archive& hin) } hin.push("rotation_params", false); - std::string rot_param_name = std::string("rotation_params_") + SPOSet::getName(); + std::string rot_param_name = std::string("rotation_params_") + SPOSetT::getName(); std::vector sizes(1); if (!hin.getShape(rot_param_name, sizes)) throw std::runtime_error("Failed to read rotation_params in VP file"); int nparam_actual = sizes[0]; - int nparam = myVars.size(); + int nparam = this->myVars.size(); if (nparam != nparam_actual) { std::ostringstream tmp_err; @@ -277,24 +282,25 @@ void RotatedSPOs::readVariationalParameters(hdf_archive& hin) std::vector params(nparam); hin.read(params, rot_param_name); for (int i = 0; i < nparam; i++) - myVars[i] = params[i]; + this->myVars[i] = params[i]; hin.pop(); hin.pop(); } -void RotatedSPOs::buildOptVariables(const size_t nel) +template +void RotatedSPOsT::buildOptVariables(const size_t nel) { -#if !defined(QMC_COMPLEX) - /* Only rebuild optimized variables if more after-rotation orbitals are needed - * Consider ROHF, there is only one set of SPO for both spin up and down Nup > Ndown. - * nel_major_ will be set Nup. - * - * Use the size of myVars as a flag to avoid building the rotation parameters again - * when a clone is made (the DiracDeterminant constructor calls buildOptVariables) - */ - if (nel > nel_major_ && myVars.size() == 0) + /* Only rebuild optimized variables if more after-rotation orbitals are + * needed Consider ROHF, there is only one set of SPO for both spin up and + * down Nup > Ndown. nel_major_ will be set Nup. + * + * Use the size of myVars as a flag to avoid building the rotation + * parameters again when a clone is made (the DiracDeterminant constructor + * calls buildOptVariables) + */ + if (nel > nel_major_ && this->myVars.size() == 0) { nel_major_ = nel; @@ -311,12 +317,11 @@ void RotatedSPOs::buildOptVariables(const size_t nel) buildOptVariables(created_m_act_rot_inds, created_full_rot_inds); } -#endif } -void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) +template +void RotatedSPOsT::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations) { -#if !defined(QMC_COMPLEX) const size_t nmo = Phi->getOrbitalSetSize(); // create active rotations @@ -338,27 +343,28 @@ void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const Rota app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl; if (params_supplied) if (nparams_active != params.size()) - throw std::runtime_error( - "The number of supplied orbital rotation parameters does not match number prdouced by the slater " - "expansion. \n"); + throw std::runtime_error("The number of supplied orbital rotation parameters does not " + "match number prdouced by the slater " + "expansion. \n"); - myVars.clear(); + this->myVars.clear(); for (int i = 0; i < nparams_active; i++) { p = m_act_rot_inds[i].first; q = m_act_rot_inds[i].second; std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_" - << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") + << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; - // If the user input parameters, use those. Otherwise, initialize the parameters to zero + // If the user input parameters, use those. Otherwise, initialize the + // parameters to zero if (params_supplied) { - myVars.insert(sstr.str(), params[i]); + this->myVars.insert(sstr.str(), params[i]); } else { - myVars.insert(sstr.str(), 0.0); + this->myVars.insert(sstr.str(), 0.0); } } @@ -370,8 +376,9 @@ void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const Rota p = m_full_rot_inds[i].first; q = m_full_rot_inds[i].second; std::stringstream sstr; - sstr << my_name_ << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p - << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q; + sstr << SPOSetT::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") + << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") + << q; if (params_supplied && i < m_act_rot_inds.size()) myVarsFull.insert(sstr.str(), params[i]); @@ -380,25 +387,24 @@ void RotatedSPOs::buildOptVariables(const RotationIndices& rotations, const Rota } } - - //Printing the parameters + // Printing the parameters if (true) { app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n"; - myVars.print(app_log()); + this->myVars.print(app_log()); } if (params_supplied) { std::vector param(m_act_rot_inds.size()); for (int i = 0; i < m_act_rot_inds.size(); i++) - param[i] = myVars[i]; + param[i] = this->myVars[i]; apply_rotation(param, false); } -#endif } -void RotatedSPOs::apply_rotation(const std::vector& param, bool use_stored_copy) +template +void RotatedSPOsT::apply_rotation(const std::vector& param, bool use_stored_copy) { assert(param.size() == m_act_rot_inds.size()); @@ -408,37 +414,33 @@ void RotatedSPOs::apply_rotation(const std::vector& param, bool use_st constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat); /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ + rot_mat is now an anti-hermitian matrix. Now we convert + it into a unitary matrix via rot_mat = exp(-rot_mat). + Finally, apply unitary matrix to orbs. + */ exponentiate_antisym_matrix(rot_mat); - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(rot_mat, use_stored_copy); - } + Phi->applyRotation(rot_mat, use_stored_copy); } -void RotatedSPOs::applyDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - std::vector& new_param) +template +void RotatedSPOsT::applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + std::vector& new_param) { const size_t nmo = Phi->getOrbitalSetSize(); ValueMatrix new_rot_mat(nmo, nmo); constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat); - { - ScopedTimer local(apply_rotation_timer_); - Phi->applyRotation(new_rot_mat, true); - } + Phi->applyRotation(new_rot_mat, true); } -void RotatedSPOs::constructDeltaRotation(const std::vector& delta_param, - const std::vector& old_param, - const RotationIndices& act_rot_inds, - const RotationIndices& full_rot_inds, - std::vector& new_param, - ValueMatrix& new_rot_mat) +template +void RotatedSPOsT::constructDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, + std::vector& new_param, + ValueMatrix& new_rot_mat) { assert(delta_param.size() == act_rot_inds.size()); assert(old_param.size() == full_rot_inds.size()); @@ -466,26 +468,28 @@ void RotatedSPOs::constructDeltaRotation(const std::vector& delta_para extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param); } -void RotatedSPOs::applyFullRotation(const std::vector& full_param, bool use_stored_copy) +template +void RotatedSPOsT::applyFullRotation(const std::vector& full_param, bool use_stored_copy) { assert(full_param.size() == m_full_rot_inds.size()); const size_t nmo = Phi->getOrbitalSetSize(); ValueMatrix rot_mat(nmo, nmo); - rot_mat = ValueType(0); + rot_mat = T(0); constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat); /* - rot_mat is now an anti-hermitian matrix. Now we convert - it into a unitary matrix via rot_mat = exp(-rot_mat). - Finally, apply unitary matrix to orbs. - */ + rot_mat is now an anti-hermitian matrix. Now we convert + it into a unitary matrix via rot_mat = exp(-rot_mat). + Finally, apply unitary matrix to orbs. + */ exponentiate_antisym_matrix(rot_mat); Phi->applyRotation(rot_mat, use_stored_copy); } -void RotatedSPOs::applyRotationHistory() +template +void RotatedSPOsT::applyRotationHistory() { for (auto delta_param : history_params_) { @@ -493,8 +497,10 @@ void RotatedSPOs::applyRotationHistory() } } -// compute exponential of a real, antisymmetric matrix by diagonalizing and exponentiating eigenvalues -void RotatedSPOs::exponentiate_antisym_matrix(ValueMatrix& mat) +// compute exponential of a real, antisymmetric matrix by diagonalizing and +// exponentiating eigenvalues +template +void RotatedSPOsT::exponentiate_antisym_matrix(ValueMatrix& mat) { const int n = mat.rows(); std::vector> mat_h(n * n, 0); @@ -526,7 +532,7 @@ void RotatedSPOs::exponentiate_antisym_matrix(ValueMatrix& mat) if (info != 0) { std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::exponentiate_antisym_matrix"; + msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix"; throw std::runtime_error(msg.str()); } // iterate through diagonal matrix, exponentiate terms @@ -548,14 +554,16 @@ void RotatedSPOs::exponentiate_antisym_matrix(ValueMatrix& mat) { if (mat_d[i + n * j].imag() > 1e-12) { - app_log() << "warning: large imaginary value in orbital rotation matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_d[i + n * j].imag() << std::endl; + app_log() << "warning: large imaginary value in orbital " + "rotation matrix: (i,j) = (" + << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl; } mat[j][i] = mat_d[i + n * j].real(); } } -void RotatedSPOs::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) +template +void RotatedSPOsT::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output) { const int n = mat.rows(); std::vector mat_h(n * n, 0); @@ -584,7 +592,7 @@ void RotatedSPOs::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output if (info != 0) { std::ostringstream msg; - msg << "heev failed with info = " << info << " in RotatedSPOs::log_antisym_matrix"; + msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix"; throw std::runtime_error(msg.str()); } @@ -613,27 +621,28 @@ void RotatedSPOs::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n); BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n); - for (int i = 0; i < n; ++i) for (int j = 0; j < n; ++j) { if (mat_cd[i + n * j].imag() > 1e-12) { - app_log() << "warning: large imaginary value in antisymmetric matrix: (i,j) = (" << i << "," << j - << "), im = " << mat_cd[i + n * j].imag() << std::endl; + app_log() << "warning: large imaginary value in antisymmetric " + "matrix: (i,j) = (" + << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl; } output[i][j] = mat_cd[i + n * j].real(); } } -void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) +template +void RotatedSPOsT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) { Phi->evaluateDetRatios(VP, psi, psiinv, ratios); @@ -650,8 +659,8 @@ void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, dpsiM_all = 0; d2psiM_all = 0; - const ParticleSet& P = VP.getRefPS(); - int iel = VP.refPtcl; + const ParticleSetT& P = VP.getRefPS(); + int iel = VP.refPtcl; Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); @@ -661,15 +670,15 @@ void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, Invert(psiM_inv.data(), nel, nel); - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T_orig; + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + ValueMatrix T_orig; T_orig.resize(nel, nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T_orig.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo); - SPOSet::ValueMatrix T; - T.resize(nel, nmo); + ValueMatrix T_mat; + T_mat.resize(nel, nmo); ValueVector tmp_psi; tmp_psi.resize(nmo); @@ -687,32 +696,34 @@ void RotatedSPOs::evaluateDerivRatios(const VirtualParticleSet& VP, Invert(psiM_inv.data(), nel, nel); - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); - // The matrix A is rectangular. Ainv is the inverse of the square part of the matrix. - // The multiply of Ainv and the square part of A is just the identity. - // This multiply could be reduced to Ainv and the non-square part of A. - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); + // The matrix A is rectangular. Ainv is the inverse of the square part + // of the matrix. The multiply of Ainv and the square part of A is just + // the identity. This multiply could be reduced to Ainv and the + // non-square part of A. + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = myVars.where(i); + int kk = this->myVars.where(i); if (kk >= 0) { const int p = m_act_rot_inds.at(i).first; const int q = m_act_rot_inds.at(i).second; - dratios(iat, kk) = T(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) + dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars) } } } } -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -738,31 +749,32 @@ void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, Invert(psiM_inv.data(), nel, nel); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 - const ValueType* const A(psiM_all.data()); - const ValueType* const Ainv(psiM_inv.data()); - SPOSet::ValueMatrix T; - T.resize(nel, nmo); + const T* const A(psiM_all.data()); + const T* const Ainv(psiM_inv.data()); + ValueMatrix T_mat; + T_mat.resize(nel, nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo); for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = myVars.where(i); + int kk = this->myVars.where(i); if (kk >= 0) { const int p = m_act_rot_inds.at(i).first; const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] = T(p, q); + dlogpsi[kk] = T_mat(p, q); } } } -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) { const size_t nel = LastIndex - FirstIndex; const size_t nmo = Phi->getOrbitalSetSize(); @@ -790,7 +802,6 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, dpsiM_all = 0; d2psiM_all = 0; - Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all); for (int i = 0; i < nel; i++) @@ -799,8 +810,8 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, Invert(psiM_inv.data(), nel, nel); - //current value of Gradient and Laplacian - // gradient components + // current value of Gradient and Laplacian + // gradient components for (int a = 0; a < nel; a++) for (int i = 0; i < nel; i++) for (int k = 0; k < 3; k++) @@ -812,88 +823,89 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i); } - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The value of + // P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and + // this is what myL_J will hold for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) { myG_J[a] = (P.G[iat] - myG_temp[a]); myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]); } - //possibly replace wit BLAS calls + // possibly replace wit BLAS calls for (int i = 0; i < nel; i++) for (int j = 0; j < nmo; j++) Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j); - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2 const ValueType* const A(psiM_all.data()); const ValueType* const Ainv(psiM_inv.data()); const ValueType* const B(Bbar.data()); - SPOSet::ValueMatrix T; - SPOSet::ValueMatrix Y1; - SPOSet::ValueMatrix Y2; - SPOSet::ValueMatrix Y3; - SPOSet::ValueMatrix Y4; - T.resize(nel, nmo); + ValueMatrix t; + ValueMatrix Y1; + ValueMatrix Y2; + ValueMatrix Y3; + ValueMatrix Y4; + t.resize(nel, nmo); Y1.resize(nel, nel); Y2.resize(nel, nmo); Y3.resize(nel, nmo); Y4.resize(nel, nmo); - - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), t.data(), nmo); BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), T.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo); BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y3.data(), nmo); - //possibly replace with BLAS call + // possibly replace with BLAS call Y4 = Y3 - Y2; for (int i = 0; i < m_act_rot_inds.size(); i++) { - int kk = myVars.where(i); + int kk = this->myVars.where(i); if (kk >= 0) { const int p = m_act_rot_inds.at(i).first; const int q = m_act_rot_inds.at(i).second; - dlogpsi[kk] += T(p, q); + dlogpsi[kk] += t(p, q); dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q); } } } -void RotatedSPOs::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) { bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) + for (int k = 0; k < this->myVars.size(); ++k) { - int kk = myVars.where(k); + int kk = this->myVars.where(k); if (kk < 0) continue; if (optvars.recompute(kk)) @@ -901,8 +913,8 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, } if (recalculate) { - ParticleSet::ParticleGradient myG_temp, myG_J; - ParticleSet::ParticleLaplacian myL_temp, myL_J; + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; const int NP = P.getTotalNum(); myG_temp.resize(NP); myG_temp = 0.0; @@ -915,13 +927,13 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, const size_t nmo = Phi->getOrbitalSetSize(); const size_t nel = P.last(0) - P.first(0); - const RealType* restrict C_p = Coeff.data(); + const T* restrict C_p = Coeff.data(); for (int i = 0; i < Coeff.size(); i++) { - const size_t upC = C2node_up[i]; - const size_t dnC = C2node_dn[i]; - const ValueType tmp1 = C_p[i] * detValues_dn[dnC]; - const ValueType tmp2 = C_p[i] * detValues_up[upC]; + const size_t upC = C2node_up[i]; + const size_t dnC = C2node_dn[i]; + const T tmp1 = C_p[i] * detValues_dn[dnC]; + const T tmp2 = C_p[i] * detValues_up[upC]; for (size_t k = 0, j = N1; k < NP1; k++, j++) { myG_temp[j] += tmp1 * grads_up(upC, k); @@ -937,43 +949,44 @@ void RotatedSPOs::evaluateDerivatives(ParticleSet& P, myG_temp *= (1 / psiCurrent); myL_temp *= (1 / psiCurrent); - // calculation of myG_J which will be used to represent \frac{\nabla\psi_{J}}{\psi_{J}} - // calculation of myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} - // IMPORTANT NOTE: The value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J will hold + // calculation of myG_J which will be used to represent + // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to + // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE: The + // value of P.L holds \nabla^2 ln[\psi] but we need \frac{\nabla^2 + // \psi}{\psi} and this is what myL_J will hold for (int iat = 0; iat < (myL_temp.size()); iat++) { myG_J[iat] = (P.G[iat] - myG_temp[iat]); myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]); } - table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl); } } - -void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { bool recalculate(false); - for (int k = 0; k < myVars.size(); ++k) + for (int k = 0; k < this->myVars.size(); ++k) { - int kk = myVars.where(k); + int kk = this->myVars.where(k); if (kk < 0) continue; if (optvars.recompute(kk)) @@ -989,43 +1002,46 @@ void RotatedSPOs::evaluateDerivativesWF(ParticleSet& P, } } -void RotatedSPOs::table_method_eval(Vector& dlogpsi, - Vector& dhpsioverpsi, - const ParticleSet::ParticleLaplacian& myL_J, - const ParticleSet::ParticleGradient& myG_J, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::table_method_eval(Vector& dlogpsi, + Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GUIDE TO THE MATICES BEING BUILT ---------------------------------------------- -The idea here is that there is a loop over all unique determinants. For each determiant the table method is employed to calculate the contributions to the parameter derivatives (dhpsioverpsi/dlogpsi) +The idea here is that there is a loop over all unique determinants. For each +determiant the table method is employed to calculate the contributions to the +parameter derivatives (dhpsioverpsi/dlogpsi) - loop through unquie determinants + loop through unquie determinants loop through parameters evaluate contributaion to dlogpsi and dhpsioverpsi -\noindent +\noindent BLAS GUIDE for matrix multiplication of [ alpha * A.B + beta * C = C ] Matrix A is of dimensions a1,a2 and Matrix B is b1,b2 in which a2=b1 @@ -1033,7 +1049,8 @@ The idea here is that there is a loop over all unique determinants. For each det BLAS::gemm('N','N', b2, a1, a2 ,alpha, B, b2, A, a2, beta, C, b2); -Below is a human readable format for the matrix multiplications performed below... +Below is a human readable format for the matrix multiplications performed +below... This notation is inspired by http://dx.doi.org/10.1063/1.4948778 \newline @@ -1041,35 +1058,45 @@ This notation is inspired by http://dx.doi.org/10.1063/1.4948778 $ A_{i,j}=\phi_j(r_{i}) \\ T = A^{-1} \widetilde{A} \\ - B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\ + B_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla +\phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) \\ \hat{O_{I}} = \hat{O}D_{I} \\ - D_{I}=det(A_{I}) \newline + D_{I}=det(A_{I}) \newline \psi_{MS} = \sum_{I=0} C_{I} D_{I\uparrow}D_{I\downarrow} \\ \Psi_{total} = \psi_{J}\psi_{MS} \\ \alpha_{I} = P^{T}_{I}TQ_{I} \\ - M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\ + M_{I} = P^{T}_{I} \widetilde{M} Q_{I} = P^{T}_{I} (A^{-1}\widetilde{B} - +A^{-1} B A^{-1}\widetilde{A} )Q_{I} \\ $ \newline There are three constants I use in the expressions for dhpsioverpsi and dlogpsi \newline \hfill\break $ - const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1} C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ - const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})* Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\ + const0 = C_{0}*det(A_{0\downarrow})+\sum_{I=1} C_{I}*det(A_{I\downarrow})* +det(\alpha_{I\uparrow}) \\ + const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{I=1} +C_{I}*\hat{O}det(A_{I\downarrow})* det(\alpha_{I\uparrow}) \\ + const2 = \sum_{I=1} C_{I}*det(A_{I\downarrow})* +Tr[\alpha_{I}^{-1}M_{I}]*det(\alpha_{I}) \\ $ \newline -Below is a translation of the shorthand I use to represent matrices independent of ``excitation matrix". -\newline -\hfill\break +Below is a translation of the shorthand I use to represent matrices independent +of ``excitation matrix". \newline \hfill\break $ Y_{1} = A^{-1}B \\ Y_{2} = A^{-1}BA^{-1}\widetilde{A} \\ Y_{3} = A^{-1}\widetilde{B} \\ - Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} )\\ + Y_{4} = \widetilde{M} = (A^{-1}\widetilde{B} - A^{-1} B A^{-1}\widetilde{A} +)\\ $ \newline -Below is a translation of the shorthand I use to represent matrices dependent on ``excitation" with respect to the reference Matrix and sums of matrices. Above this line I have represented these excitation matrices with a subscript ``I" but from this point on The subscript will be omitted and it is clear that whenever a matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix. +Below is a translation of the shorthand I use to represent matrices dependent on +``excitation" with respect to the reference Matrix and sums of matrices. Above +this line I have represented these excitation matrices with a subscript ``I" but +from this point on The subscript will be omitted and it is clear that whenever a +matrix depends on $P^{T}_I$ and $Q_{I}$ that this is an excitation matrix. The +reference matrix is always $A_{0}$ and is always the Hartree Fock Matrix. \newline \hfill\break $ @@ -1083,49 +1110,74 @@ Below is a translation of the shorthand I use to represent matrices dependent on Y_{26} = \alpha_{I}^{-1}P^{T}\widetilde{M}Q\alpha_{I}^{-1}P^{T}\\ $ \newline -So far you will notice that I have not included up or down arrows to specify what spin the matrices are of. This is because we are calculating the derivative of all up or all down spin orbital rotation parameters at a time. If we are finding the up spin derivatives then any term that is down spin will be constant. The following assumes that we are taking up-spin MO rotation parameter derivatives. Of course the down spin expression can be retrieved by swapping the up and down arrows. I have dubbed any expression with lowercase p prefix as a "precursor" to an expression actually used... -\newline -\hfill\break +So far you will notice that I have not included up or down arrows to specify +what spin the matrices are of. This is because we are calculating the derivative +of all up or all down spin orbital rotation parameters at a time. If we are +finding the up spin derivatives then any term that is down spin will be +constant. The following assumes that we are taking up-spin MO rotation parameter +derivatives. Of course the down spin expression can be retrieved by swapping the +up and down arrows. I have dubbed any expression with lowercase p prefix as a +"precursor" to an expression actually used... \newline \hfill\break $ \dot{C_{I}} = C_{I}*det(A_{I\downarrow})\\ \ddot{C_{I}} = C_{I}*\hat{O}det(A_{I\downarrow}) \\ - pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}) \\ + pK1 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] +(Q\alpha_{I}^{-1}P^{T}) \\ pK2 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ pK3 = \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ pK4 = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}) \\ - pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T}) \\ + pK5 = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} +\alpha_{I}^{-1}P^{T}) \\ $ \newline Now these p matrices will be used to make various expressions via BLAS commands. \newline \hfill\break $ - K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\ - TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ - K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ - K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ - TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\ - K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ - MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\ - K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ + K1T = const0^{-1}*pK1.T =const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +Tr[\alpha_{I}^{-1}M_{I}] (Q\alpha_{I}^{-1}P^{T}T) \\ + TK1T = T.K1T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +Tr[\alpha_{I}^{-1}M_{I}] (TQ\alpha_{I}^{-1}P^{T}T)\\ \\ + K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +(Q\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ + TK2AiB = T.K2AiB = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +(TQ\alpha_{I}^{-1}P^{T}A^{-1}\widetilde{B})\\ + K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +(Q\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ + TK2XA = T.K2XA = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +(TQ\alpha_{I}^{-1}P^{T}X\widetilde{A})\\ \\ + K2T = \frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) +(Q\alpha_{I}^{-1}P^{T}T) \\ + TK2T = T.K2T =\frac{const1}{const0^{2}} \sum_{I=1} \dot{C_{I}} +det(\alpha_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ + MK2T = \frac{const0}{const1} Y_{4}.K2T= const0^{-1} \sum_{I=1} \dot{C_{I}} +det(\alpha_{I}) (\widetilde{M}Q\alpha_{I}^{-1}P^{T}T)\\ \\ + K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) +(Q\alpha_{I}^{-1}P^{T}T) \\ + TK3T = T.K3T = const0^{-1} \sum_{I=1} \ddot{C_{I}} det(\alpha_{I}) +(TQ\alpha_{I}^{-1}P^{T}T)\\ \\ K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (Q\alpha_{I}^{-1}P^{T}T) \\ - TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) \\ \\ - K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ - TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} M_{I} \alpha_{I}^{-1}P^{T} T) \\ + TK4T = T.K4T = \sum_{I=1} \dot{C_{I}} det(A_{I}) (TQ\alpha_{I}^{-1}P^{T}T) +\\ \\ + K5T = const0^{-1} \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (Q\alpha_{I}^{-1} +M_{I} \alpha_{I}^{-1}P^{T} T) \\ + TK5T = T.K5T = \sum_{I=1} \dot{C_{I}} det(\alpha_{I}) (T Q\alpha_{I}^{-1} +M_{I} \alpha_{I}^{-1}P^{T} T) \\ $ \newline -Now with all these matrices and constants the expressions of dhpsioverpsi and dlogpsi can be created. +Now with all these matrices and constants the expressions of dhpsioverpsi and +dlogpsi can be created. -In addition I will be using a special generalization of the kinetic operator which I will denote as O. Our Slater matrix with the special O operator applied to each element will be called B_bar +In addition I will be using a special generalization of the kinetic operator +which I will denote as O. Our Slater matrix with the special O operator applied +to each element will be called B_bar $ -``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla \phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) +``Bbar"_{i,j} =\nabla^2 \phi_{j}(r_i) + \frac{\nabla_{i}J}{J} \cdot \nabla +\phi_{j}(r_{i}) + \frac{\nabla^2_i J}{J} \phi_{j}(r_{i}) $ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ { @@ -1170,54 +1222,62 @@ In addition I will be using a special generalization of the kinetic operator whi const size_t num_unique_up_dets(detValues_up.size()); const size_t num_unique_dn_dets(detValues_dn.size()); - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); const size_t* restrict upC(C2node_up.data()); const size_t* restrict dnC(C2node_dn.data()); - //B_grad holds the gradient operator - //B_lapl holds the laplacian operator - //B_bar will hold our special O operator + // B_grad holds the gradient operator + // B_lapl holds the laplacian operator + // B_bar will hold our special O operator const int offset1(N1); const int offset2(N2); const int NPother(NP2); - RealType* T(Table.data()); + T* T_(Table.data()); - //possibly replace wit BLAS calls + // possibly replace wit BLAS calls for (int i = 0; i < nel; i++) for (int j = 0; j < nmo; j++) - Bbar(i, j) = B_lapl(i, j) + 2 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); - - const RealType* restrict B(Bbar.data()); - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - BLAS::gemm('N', 'N', nel, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), T, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); - - //possibly replace with BLAS call + Bbar(i, j) = B_lapl(i, j) + 2.0 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j); + + const T* restrict B(Bbar.data()); + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); + + BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo); + + // possibly replace with BLAS call Y4 = Y3 - Y2; - //Need to create the constants: (Oi, const0, const1, const2)to take advantage of minimal BLAS commands; - //Oi is the special operator applied to the slater matrix "A subscript i" from the total CI expansion - //\hat{O_{i}} = \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as \sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} + // Need to create the constants: (Oi, const0, const1, const2)to take + // advantage of minimal BLAS commands; Oi is the special operator applied to + // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}} + //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as + //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow} std::vector Oi(num_unique_dn_dets); for (int index = 0; index < num_unique_dn_dets; index++) for (int iat = 0; iat < NPother; iat++) - Oi[index] += lapls_dn(index, iat) + 2 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + + Oi[index] += lapls_dn(index, iat) + 2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) + myL_J[offset2 + iat] * detValues_dn[index]; - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const1 = C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) - //const2 = \sum_{i=1} C_{i}*det(A_{i\downarrow})* Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 = + // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 = + // \sum_{i=1} C_{i}*det(A_{i\downarrow})* + // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i}) RealType const0(0.0), const1(0.0), const2(0.0); for (size_t i = 0; i < nc; ++i) { @@ -1234,10 +1294,12 @@ In addition I will be using a special generalization of the kinetic operator whi std::fill(pK3.begin(), pK3.end(), 0.0); std::fill(pK5.begin(), pK5.end(), 0.0); - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method const int* restrict data_it = detData_up.data(); for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { @@ -1250,28 +1312,29 @@ In addition I will be using a special generalization of the kinetic operator whi else { - //Number of rows and cols of P^T + // Number of rows and cols of P^T const int prows = k; const int pcols = nel; - //Number of rows and cols of Q + // Number of rows and cols of Q const int qrows = nmo; const int qcols = k; Y5.resize(nel, k); Y6.resize(k, k); - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 std::fill(Y5.begin(), Y5.end(), 0.0); for (int i = 0; i < k; i++) { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); } std::fill(Y6.begin(), Y6.end(), 0.0); @@ -1280,8 +1343,7 @@ In addition I will be using a special generalization of the kinetic operator whi BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); } - - Vector WS; + Vector WS; Vector Piv; WS.resize(k); Piv.resize(k); @@ -1309,7 +1371,6 @@ In addition I will be using a special generalization of the kinetic operator whi BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k); BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k); - Y26.resize(k, nel); std::fill(Y26.begin(), Y26.end(), 0.0); @@ -1318,7 +1379,6 @@ In addition I will be using a special generalization of the kinetic operator whi BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel); } - Y7.resize(k, nel); std::fill(Y7.begin(), Y7.end(), 0.0); @@ -1337,7 +1397,9 @@ In addition I will be using a special generalization of the kinetic operator whi for (int p = 0; p < lookup_tbl[index].size(); p++) { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant const int el_p(lookup_tbl[index][p]); const RealType c = cptr[el_p]; const size_t up = upC[el_p]; @@ -1361,82 +1423,81 @@ In addition I will be using a special generalization of the kinetic operator whi } } - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T, nmo, RealType(0.0), TK1T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo); BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T, nmo, RealType(0.0), TK2AiB.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo); BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T, nmo, RealType(0.0), TK2XA.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), + BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T, nmo, RealType(0.0), TK2T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo); BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T, nmo, RealType(0.0), TK3T.data(), nmo); - - BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T, nmo, RealType(0.0), TK5T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo); for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) { - int kk = myVars.where(k); + int kk = this->myVars.where(k); if (kk >= 0) { const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); if (i <= nel - 1 && j > nel - 1) { dhpsioverpsi[kk] += - ValueType(-0.5 * Y4(i, j) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + - K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + - K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); + T(-0.5 * Y4(i, j) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) + + K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) - + const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) + + K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j))); } else if (i <= nel - 1 && j <= nel - 1) { - dhpsioverpsi[kk] += ValueType( - -0.5 * (Y4(i, j) - Y4(j, i)) - - 0.5 * - (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + - TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + - K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + K3T(i, j) - - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); + dhpsioverpsi[kk] += + T(-0.5 * (Y4(i, j) - Y4(j, i)) - + 0.5 * + (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) + + TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) + + K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) + + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) + + K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i))); } else { - dhpsioverpsi[kk] += ValueType(-0.5 * - (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + dhpsioverpsi[kk] += T(-0.5 * + (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i) - + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + - const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); + + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + + K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i))); } } } } -void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, - const size_t nel, - const size_t nmo, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) +template +void RotatedSPOsT::table_method_evalWF(Vector& dlogpsi, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) { ValueMatrix Table; ValueMatrix Y5, Y6, Y7; @@ -1456,22 +1517,26 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, const size_t num_unique_up_dets(detValues_up.size()); const size_t num_unique_dn_dets(detValues_dn.size()); - const RealType* restrict cptr = Coeff.data(); - const size_t nc = Coeff.size(); + const T* restrict cptr = Coeff.data(); + const size_t nc = Coeff.size(); const size_t* restrict upC(C2node_up.data()); const size_t* restrict dnC(C2node_dn.data()); - RealType* T(Table.data()); - - const RealType* restrict A(M_up.data()); - const RealType* restrict Ainv(Minv_up.data()); - //IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR THIS CASE - // The T matrix should be calculated and stored for use - // T = A^{-1} \widetilde A - //REMINDER: that the ValueMatrix "matrix" stores data in a row major order and that BLAS commands assume column major - BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T, nmo); - - //const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) + T* T_(Table.data()); + + const T* restrict A(M_up.data()); + const T* restrict Ainv(Minv_up.data()); + // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF + // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR + // THIS CASE + // The T matrix should be calculated and stored for use + // T = A^{-1} \widetilde A + // REMINDER: that the ValueMatrix "matrix" stores data in a row major order + // and that BLAS commands assume column major + BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo); + + // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1} + // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) RealType const0(0.0), const1(0.0), const2(0.0); for (size_t i = 0; i < nc; ++i) { @@ -1484,10 +1549,12 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, std::fill(pK4.begin(), pK4.end(), 0.0); - //Now we are going to loop through all unique determinants. - //The few lines above are for the reference matrix contribution. - //Although I start the loop below from index 0, the loop only performs actions when the index is >= 1 - //the detData object contains all the information about the P^T and Q matrices (projection matrices) needed in the table method + // Now we are going to loop through all unique determinants. + // The few lines above are for the reference matrix contribution. + // Although I start the loop below from index 0, the loop only performs + // actions when the index is >= 1 the detData object contains all the + // information about the P^T and Q matrices (projection matrices) needed in + // the table method const int* restrict data_it = detData_up.data(); for (int index = 0, datum = 0; index < num_unique_up_dets; index++) { @@ -1500,28 +1567,29 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, else { - //Number of rows and cols of P^T + // Number of rows and cols of P^T const int prows = k; const int pcols = nel; - //Number of rows and cols of Q + // Number of rows and cols of Q const int qrows = nmo; const int qcols = k; Y5.resize(nel, k); Y6.resize(k, k); - //Any matrix multiplication of P^T or Q is simply a projection - //Explicit matrix multiplication can be avoided; instead column or row copying can be done - //BlAS::copy(size of col/row being copied, - // Matrix pointer + place to begin copying, - // storage spacing (number of elements btw next row/col element), - // Pointer to resultant matrix + place to begin pasting, - // storage spacing of resultant matrix) - //For example the next 4 lines is the matrix multiplication of T*Q = Y5 + // Any matrix multiplication of P^T or Q is simply a projection + // Explicit matrix multiplication can be avoided; instead column or + // row copying can be done BlAS::copy(size of col/row being copied, + // Matrix pointer + place to begin copying, + // storage spacing (number of elements btw next row/col + // element), Pointer to resultant matrix + place to begin + // pasting, storage spacing of resultant matrix) + // For example the next 4 lines is the matrix multiplication of T*Q + // = Y5 std::fill(Y5.begin(), Y5.end(), 0.0); for (int i = 0; i < k; i++) { - BLAS::copy(nel, T + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); + BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k); } std::fill(Y6.begin(), Y6.end(), 0.0); @@ -1530,7 +1598,7 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1); } - Vector WS; + Vector WS; Vector Piv; WS.resize(k); Piv.resize(k); @@ -1547,7 +1615,9 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, for (int p = 0; p < lookup_tbl[index].size(); p++) { - //el_p is the element position that contains information about the CI coefficient, and det up/dn values associated with the current unique determinant + // el_p is the element position that contains information about + // the CI coefficient, and det up/dn values associated with the + // current unique determinant const int el_p(lookup_tbl[index][p]); const RealType c = cptr[el_p]; const size_t up = upC[el_p]; @@ -1564,37 +1634,37 @@ void RotatedSPOs::table_method_evalWF(Vector& dlogpsi, } } - BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); - BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T, nmo, RealType(0.0), TK4T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo); + BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo); for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++) { - int kk = myVars.where(k); + int kk = this->myVars.where(k); if (kk >= 0) { const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second); if (i <= nel - 1 && j > nel - 1) { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - K4T(j, i) - TK4T(i, j))); + dlogpsi[kk] += + T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j))); } else if (i <= nel - 1 && j <= nel - 1) { - dlogpsi[kk] += ValueType(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + - (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); + dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) + + (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i))); } else { - dlogpsi[kk] += ValueType((K4T(i, j) - K4T(j, i))); + dlogpsi[kk] += T((K4T(i, j) - K4T(j, i))); } } } } - -std::unique_ptr RotatedSPOs::makeClone() const +template +std::unique_ptr> RotatedSPOsT::makeClone() const { - auto myclone = std::make_unique(my_name_, std::unique_ptr(Phi->makeClone())); + auto myclone = std::make_unique(SPOSetT::getName(), std::unique_ptr>(Phi->makeClone())); myclone->params = this->params; myclone->params_supplied = this->params_supplied; @@ -1607,73 +1677,79 @@ std::unique_ptr RotatedSPOs::makeClone() const return myclone; } -void RotatedSPOs::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const +template +void RotatedSPOsT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list); } -void RotatedSPOs::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const +template +void RotatedSPOsT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list); } -void RotatedSPOs::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const +template +void RotatedSPOsT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list); } -void RotatedSPOs::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const +template +void RotatedSPOsT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); } -void RotatedSPOs::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const +template +void RotatedSPOsT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads); } -void RotatedSPOs::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const +template +void RotatedSPOsT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); @@ -1681,47 +1757,61 @@ void RotatedSPOs::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLead spingrads); } -void RotatedSPOs::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void RotatedSPOsT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list); } -void RotatedSPOs::createResource(ResourceCollection& collection) const { Phi->createResource(collection); } +template +void RotatedSPOsT::createResource(ResourceCollection& collection) const +{ + Phi->createResource(collection); +} -void RotatedSPOs::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const +template +void RotatedSPOsT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.acquireResource(collection, phi_list); } -void RotatedSPOs::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const +template +void RotatedSPOsT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { auto phi_list = extractPhiRefList(spo_list); auto& leader = phi_list.getLeader(); leader.releaseResource(collection, phi_list); } -RefVectorWithLeader RotatedSPOs::extractPhiRefList(const RefVectorWithLeader& spo_list) +template +RefVectorWithLeader> RotatedSPOsT::extractPhiRefList(const RefVectorWithLeader>& spo_list) { - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader(); const auto nw = spo_list.size(); - RefVectorWithLeader phi_list(*spo_leader.Phi); + RefVectorWithLeader> phi_list(*spo_leader.Phi); phi_list.reserve(nw); for (int iw = 0; iw < nw; iw++) { - RotatedSPOs& rot = spo_list.getCastedElement(iw); + RotatedSPOsT& rot = spo_list.template getCastedElement(iw); phi_list.emplace_back(*rot.Phi); } return phi_list; } +// Class concrete types from ValueType +template class RotatedSPOsT; +template class RotatedSPOsT; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h new file mode 100644 index 0000000000..dd3680a349 --- /dev/null +++ b/src/QMCWaveFunctions/RotatedSPOsT.h @@ -0,0 +1,483 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers +// +// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley +// Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_ROTATEDSPOST_H +#define QMCPLUSPLUS_ROTATEDSPOST_H + +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +class RotatedSPOsT; +namespace testing +{ +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot); +std::vector>& getHistoryParams(RotatedSPOsT& rot); +std::vector>& getHistoryParams(RotatedSPOsT& rot); +} // namespace testing + +template +class RotatedSPOsT : public SPOSetT, public OptimizableObjectT +{ +public: + using IndexType = typename SPOSetT::IndexType; + using RealType = typename SPOSetT::RealType; + using ValueType = typename SPOSetT::ValueType; + using FullValueType = typename SPOSetT::FullValueType; + using GradType = typename SPOSetT::GradType; + using ComplexType = typename SPOSetT::ComplexType; + using FullRealType = typename SPOSetT::FullRealType; + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using HessVector = typename SPOSetT::HessVector; + using HessMatrix = typename SPOSetT::HessMatrix; + using GGGVector = typename SPOSetT::GGGVector; + using GGGMatrix = typename SPOSetT::GGGMatrix; + using OffloadMWVGLArray = typename SPOSetT::OffloadMWVGLArray; + template + using OffloadMatrix = Matrix>; + + // constructor + RotatedSPOsT(const std::string& my_name, std::unique_ptr>&& spos); + // destructor + ~RotatedSPOsT() override; + + std::string getClassName() const override { return "RotatedSPOsT"; } + bool isOptimizable() const override { return true; } + bool isOMPoffload() const override { return Phi->isOMPoffload(); } + bool hasIonDerivs() const override { return Phi->hasIonDerivs(); } + + // Vector of rotation matrix indices + using RotationIndices = std::vector>; + + // Active orbital rotation parameter indices + RotationIndices m_act_rot_inds; + + // Full set of rotation values for global rotation + RotationIndices m_full_rot_inds; + + // Construct a list of the matrix indices for non-zero rotation parameters. + // (The structure for a sparse representation of the matrix) + // Only core->active rotations are created. + static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices); + + // Construct a list for all the matrix indices, including core->active, + // core->core and active->active + static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices); + + // Fill in antisymmetric matrix from the list of rotation parameter indices + // and a list of parameter values. + // This function assumes rot_mat is properly sized upon input and is set to + // zero. + static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices, + const std::vector& param, + ValueMatrix& rot_mat); + + // Extract the list of rotation parameters from the entries in an + // antisymmetric matrix This function expects rot_indices and param are the + // same length. + static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices, + const ValueMatrix& rot_mat, + std::vector& param); + + // function to perform orbital rotations + void apply_rotation(const std::vector& param, bool use_stored_copy); + + // For global rotation, inputs are the old parameters and the delta + // parameters. The corresponding rotation matrices are constructed, + // multiplied together, and the new parameters extracted. The new rotation + // is applied to the underlying SPO coefficients + void applyDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + std::vector& new_param); + + // Perform the construction of matrices and extraction of parameters for a + // delta rotation. Split out and made static for testing. + static void constructDeltaRotation(const std::vector& delta_param, + const std::vector& old_param, + const RotationIndices& act_rot_inds, + const RotationIndices& full_rot_inds, + std::vector& new_param, + ValueMatrix& new_rot_mat); + + // When initializing the rotation from VP files + // This function applies the rotation history + void applyRotationHistory(); + + // This function applies the global rotation (similar to apply_rotation, but + // for the full set of rotation parameters) + void applyFullRotation(const std::vector& full_param, bool use_stored_copy); + + // Compute matrix exponential of an antisymmetric matrix (result is rotation + // matrix) + static void exponentiate_antisym_matrix(ValueMatrix& mat); + + // Compute matrix log of rotation matrix to produce antisymmetric matrix + static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output); + + // A particular SPOSet used for Orbitals + std::unique_ptr> Phi; + + /// Set the rotation parameters (usually from input file) + void setRotationParameters(const std::vector& param_list); + + /// the number of electrons of the majority spin + size_t nel_major_; + + std::unique_ptr> makeClone() const override; + + // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the + // Determinant part of the wfn myG_J is the Gradient of the all other parts + // of the wavefunction (typically just the Jastrow). + // It represents \frac{\nabla\psi_{J}}{\psi_{J}} + // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The + // Laplacian portion IMPORTANT NOTE: The value of P.L holds \nabla^2 + // ln[\psi] but we need \frac{\nabla^2 \psi}{\psi} and this is what myL_J + // will hold + typename ParticleSetT::ParticleGradient myG_temp, myG_J; + typename ParticleSetT::ParticleLaplacian myL_temp, myL_J; + + ValueMatrix Bbar; + ValueMatrix psiM_inv; + ValueMatrix psiM_all; + GradMatrix dpsiM_all; + ValueMatrix d2psiM_all; + + // Single Slater creation + void buildOptVariables(size_t nel); + + // For the MSD case rotations must be created in MultiSlaterDetTableMethod + // class + void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations); + + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) override; + + void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) override; + + void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) override; + + // helper function to evaluatederivative; evaluate orbital rotation + // parameter derivative using table method + void table_method_eval(Vector& dlogpsi, + Vector& dhpsioverpsi, + const typename ParticleSetT::ParticleLaplacian& myL_J, + const typename ParticleSetT::ParticleGradient& myG_J, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); + + void table_method_evalWF(Vector& dlogpsi, + const size_t nel, + const size_t nmo, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl); + + void extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs) override { opt_obj_refs.push_back(*this); } + + void checkInVariablesExclusive(OptVariablesTypeT& active) override + { + if (this->myVars.size()) + active.insertFrom(this->myVars); + } + + void checkOutVariables(const OptVariablesTypeT& active) override { this->myVars.getIndex(active); } + + /// reset + void resetParametersExclusive(const OptVariablesTypeT& active) override; + + void writeVariationalParameters(hdf_archive& hout) override; + + void readVariationalParameters(hdf_archive& hin) override; + + //********************************************************************************* + // the following functions simply call Phi's corresponding functions + void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); } + + void checkObject() const override { Phi->checkObject(); } + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateValue(P, iat, psi); + } + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGL(P, iat, psi, dpsi, d2psi); + } + + void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) override + { + Phi->evaluateDetRatios(VP, psi, psiinv, ratios); + } + + void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) override; + + void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) override + { + assert(psi.size() <= this->OrbitalSetSize); + Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi); + } + + void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) override + { + Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet); + } + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) override + { + Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet); + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi); + } + + void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) override + { + Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi); + } + + // void evaluateThirdDeriv(const ParticleSet& P, int first, int last, + // GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first, + // last, grad_grad_grad_logdet); } + + /// Use history list (false) or global rotation (true) + void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; } + + void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const override; + + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const override; + + void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const override; + + void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const override; + + void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const override; + + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void createResource(ResourceCollection& collection) const override; + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; + +private: + /// true if SPO parameters (orbital rotation parameters) have been supplied + /// by input + bool params_supplied; + /// list of supplied orbital rotation parameters + std::vector params; + + /// Full set of rotation matrix parameters for use in global rotation method + OptVariablesTypeT myVarsFull; + + /// List of previously applied parameters + std::vector> history_params_; + + /// Use global rotation or history list + bool use_global_rot_ = true; + + static RefVectorWithLeader> extractPhiRefList(const RefVectorWithLeader>& spo_list); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend OptVariablesTypeT& testing::getMyVarsFull(RotatedSPOsT& rot); + friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); + friend std::vector>& testing::getHistoryParams(RotatedSPOsT& rot); +}; + +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/SPOSet.cpp b/src/QMCWaveFunctions/SPOSet.cpp deleted file mode 100644 index dacfd4423b..0000000000 --- a/src/QMCWaveFunctions/SPOSet.cpp +++ /dev/null @@ -1,406 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign -// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory -// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign -// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SPOSet.h" -#include "Message/Communicate.h" -#include "Numerics/MatrixOperators.h" -#include "OhmmsData/AttributeSet.h" -#include "CPU/SIMD/inner_product.hpp" -#include "Utilities/ProgressReportEngine.h" -#include "hdf/hdf_archive.h" -#include - -namespace qmcplusplus -{ -SPOSet::SPOSet(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0) {} - -void SPOSet::extractOptimizableObjectRefs(UniqueOptObjRefs&) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::extractOptimizableObjectRefs " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::checkOutVariables(const opt_variables_type& active) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::checkOutVariables " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios) -{ - assert(psi.size() == psiinv.size()); - for (int iat = 0; iat < VP.getTotalNum(); ++iat) - { - evaluateValue(VP, iat, psi); - ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); - } -} - -void SPOSet::mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - { - Vector invRow(const_cast(invRow_ptr_list[iw]), psi_list[iw].get().size()); - spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]); - } -} - -void SPOSet::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin"); -} - -void SPOSet::mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]); -} - -void SPOSet::mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]); -} - -void SPOSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const -{ - throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n"); -} - -void SPOSet::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const -{ - assert(this == &spo_list.getLeader()); - assert(phi_vgl_v.size(0) == DIM_VGL); - assert(phi_vgl_v.size(1) == spo_list.size()); - const size_t nw = spo_list.size(); - const size_t norb_requested = phi_vgl_v.size(2); - GradVector dphi_v(norb_requested); - for (int iw = 0; iw < nw; iw++) - { - ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested); - ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested); - spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v); - - ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested); - grads[iw] = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw]; - - // transpose the array of gradients to SoA in phi_vgl_v - for (size_t idim = 0; idim < DIM; idim++) - { - ValueType* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0); - for (size_t iorb = 0; iorb < norb_requested; iorb++) - phi_g[iorb] = dphi_v[iorb][idim]; - } - } - phi_vgl_v.updateTo(); -} - -void SPOSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n"); -} - -void SPOSet::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n"); -} - -void SPOSet::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n"); -} - -void SPOSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const -{ - assert(this == &spo_list.getLeader()); - for (int iw = 0; iw < spo_list.size(); iw++) - spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]); -} - -void SPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_logdet. \n"); -} - -void SPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet) -{ - throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for grad_grad_grad_logdet. \n"); -} - - -std::unique_ptr SPOSet::makeClone() const -{ - throw std::runtime_error("Missing SPOSet::makeClone for " + getClassName()); -} - -void SPOSet::basic_report(const std::string& pad) const -{ - app_log() << pad << "size = " << size() << std::endl; - app_log() << pad << "state info:" << std::endl; - //states.report(pad+" "); - app_log().flush(); -} - -void SPOSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n"); -} - -void SPOSet::evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n"); -} - -void SPOSet::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) -{ - if (isRotationSupported()) - throw std::logic_error("Bug!! " + getClassName() + - "::applyRotation " - "must be overloaded when the SPOSet supports rotation."); -} - -void SPOSet::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); -} - -void SPOSet::evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex) -{ - // Match the fallback in WaveFunctionComponent that evaluates just the ratios - evaluateDetRatios(VP, psi, psiinv, ratios); - - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivRatios " - "must be overloaded when the SPOSet is optimizable."); -} - - -/** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ -void SPOSet::evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivatives " - "must be overloaded when the SPOSet is optimizable."); -} - -/** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ -void SPOSet::evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl) -{ - if (isOptimizable()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateDerivativesWF " - "must be overloaded when the SPOSet is optimizable."); -} - - -void SPOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSource " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi) -{ - if (hasIonDerivs()) - throw std::logic_error("Bug!! " + getClassName() + - "::evaluateGradSourceRow " - "must be overloaded when the SPOSet has ion derivatives."); -} - -void SPOSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) -{ - throw std::runtime_error("Need specialization of " + getClassName() + - "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n"); -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSet.h b/src/QMCWaveFunctions/SPOSet.h index 4f7c8e8175..0313f3ebfc 100644 --- a/src/QMCWaveFunctions/SPOSet.h +++ b/src/QMCWaveFunctions/SPOSet.h @@ -20,544 +20,11 @@ #ifndef QMCPLUSPLUS_SINGLEPARTICLEORBITALSETBASE_H #define QMCPLUSPLUS_SINGLEPARTICLEORBITALSETBASE_H -#include "OhmmsPETE/OhmmsArray.h" -#include "Particle/ParticleSet.h" -#include "Particle/VirtualParticleSet.h" -#include "QMCWaveFunctions/OrbitalSetTraits.h" -#include "OptimizableObject.h" -#include "OMPTarget/OffloadAlignedAllocators.hpp" -#include "DualAllocatorAliases.hpp" +#include "QMCWaveFunctions/SPOSetT.h" namespace qmcplusplus { -class ResourceCollection; - -class SPOSet; -namespace testing -{ -opt_variables_type& getMyVars(SPOSet& spo); -} - - -/** base class for Single-particle orbital sets - * - * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains - * a number of single-particle orbitals with capabilities of evaluating \f$ \psi_j({\bf r}_i)\f$ - */ -class SPOSet : public QMCTraits -{ -public: - using ValueVector = OrbitalSetTraits::ValueVector; - using ValueMatrix = OrbitalSetTraits::ValueMatrix; - using GradVector = OrbitalSetTraits::GradVector; - using GradMatrix = OrbitalSetTraits::GradMatrix; - using HessVector = OrbitalSetTraits::HessVector; - using HessMatrix = OrbitalSetTraits::HessMatrix; - using GGGVector = OrbitalSetTraits::GradHessVector; - using GGGMatrix = OrbitalSetTraits::GradHessMatrix; - using SPOMap = std::map>; - using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] - using OffloadMWVArray = Array>; // [walker, Orbs] - template - using OffloadMatrix = Matrix>; - - /** constructor */ - SPOSet(const std::string& my_name); - - /** destructor - * - * Derived class destructor needs to pay extra attention to freeing memory shared among clones of SPOSet. - */ - virtual ~SPOSet() = default; - - /** return the size of the orbital set - * Ye: this needs to be replaced by getOrbitalSetSize(); - */ - inline int size() const { return OrbitalSetSize; } - - /** print basic SPOSet information - */ - void basic_report(const std::string& pad = "") const; - - /** print SPOSet information - */ - virtual void report(const std::string& pad = "") const { basic_report(pad); } - - - /** return the size of the orbitals - */ - inline int getOrbitalSetSize() const { return OrbitalSetSize; } - - /// Query if this SPOSet is optimizable - virtual bool isOptimizable() const { return false; } - - /** extract underlying OptimizableObject references - * @param opt_obj_refs aggregated list of optimizable object references - */ - virtual void extractOptimizableObjectRefs(UniqueOptObjRefs& opt_obj_refs); - - /** check out variational optimizable variables - * @param active a super set of optimizable variables - */ - virtual void checkOutVariables(const opt_variables_type& active); - - /// Query if this SPOSet uses OpenMP offload - virtual bool isOMPoffload() const { return false; } - - /** Query if this SPOSet has an explicit ion dependence. returns true if it does. - */ - virtual bool hasIonDerivs() const { return false; } - - /// check a few key parameters before putting the SPO into a determinant - virtual void checkObject() const {} - - /// return true if this SPOSet can be wrappered by RotatedSPO - virtual bool isRotationSupported() const { return false; } - /// store parameters before getting destroyed by rotation. - virtual void storeParamsBeforeRotation() {} - /// apply rotation to all the orbitals - virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); - - /// Parameter derivatives of the wavefunction and the Laplacian of the wavefunction - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const int& FirstIndex, - const int& LastIndex); - - /// Parameter derivatives of the wavefunction - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - int FirstIndex, - int LastIndex); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivatives(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - Vector& dhpsioverpsi, - const ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const GradMatrix& grads_up, - const GradMatrix& grads_dn, - const ValueMatrix& lapls_up, - const ValueMatrix& lapls_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const GradMatrix& B_grad, - const ValueMatrix& B_lapl, - const std::vector& detData_up, - const size_t N1, - const size_t N2, - const size_t NP1, - const size_t NP2, - const std::vector>& lookup_tbl); - - /** Evaluate the derivative of the optimized orbitals with respect to the parameters - * this is used only for MSD, to be refined for better serving both single and multi SD - */ - virtual void evaluateDerivativesWF(ParticleSet& P, - const opt_variables_type& optvars, - Vector& dlogpsi, - const QTFull::ValueType& psiCurrent, - const std::vector& Coeff, - const std::vector& C2node_up, - const std::vector& C2node_dn, - const ValueVector& detValues_up, - const ValueVector& detValues_dn, - const ValueMatrix& M_up, - const ValueMatrix& M_dn, - const ValueMatrix& Minv_up, - const ValueMatrix& Minv_dn, - const std::vector& detData_up, - const std::vector>& lookup_tbl); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - * Ye: I prefer to remove this interface in the future. SPOSet builders need to handle the size correctly. - * It doesn't make sense allowing to set the value at any place in the code. - */ - virtual void setOrbitalSetSize(int norbs) = 0; - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - virtual void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) = 0; - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP - * @param VP virtual particle set - * @param psi values of the SPO, used as a scratch space if needed - * @param psiinv the row of inverse slater matrix corresponding to the particle moved virtually - * @param ratios return determinant ratios - */ - virtual void evaluateDetRatios(const VirtualParticleSet& VP, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios); - - - /// Determinant ratios and parameter derivatives of the wavefunction for virtual moves - virtual void evaluateDerivRatios(const VirtualParticleSet& VP, - const opt_variables_type& optvars, - ValueVector& psi, - const ValueVector& psiinv, - std::vector& ratios, - Matrix& dratios, - int FirstIndex, - int LastIndex); - - - /** evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param vp_list a list of virtual particle sets in a walker batch - * @param psi_list a list of values of the SPO, used as a scratch space if needed - * @param invRow_ptr_list a list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually - * @param ratios_list a list of returning determinant ratios - */ - virtual void mw_evaluateDetRatios(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& vp_list, - const RefVector& psi_list, - const std::vector& invRow_ptr_list, - std::vector>& ratios_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - */ - virtual void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) = 0; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradients of the SPO - */ - virtual void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin); - - /** evaluate the values this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - */ - virtual void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - */ - virtual void mw_evaluateVGL(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list) const; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin is a dual matrix of spin gradients [nw][norb] - * Note that the device side of mw_dspin is up to date - */ - virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param psi_ratio_grads_v determinant ratio and grads of all the walkers - */ - virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads) const; - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - */ - virtual void evaluateVGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi); - - /** evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param grad_grad_psi hessians of the SPO - * @param grad_grad_grad_psi grad hessians of the SPO - */ - virtual void evaluateVGHGH(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - HessVector& grad_grad_psi, - GGGVector& grad_grad_grad_psi); - - /** evaluate the values of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - virtual void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi); - - /** evaluate the third derivatives of this single-particle orbital set - * @param P current ParticleSet - * @param first first particle - * @param last last particle - * @param grad_grad_grad_logdet third derivatives of the SPO - */ - virtual void evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param[in] P current ParticleSet - * @param[in] first starting index of the particles - * @param[in] last ending index of the particles - * @param[out] logdet determinant matrix to be inverted - * @param[out] dlogdet gradients - * @param[out] d2logdet laplacians - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) = 0; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * @param dspinlogdet, spin gradients - * - * default implementation will abort for all SPOSets except SpinorSet - * - */ - virtual void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet); - - virtual void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const; - - /** evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet); - - /** evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param grad_grad_logdet hessians - * @param grad_grad_grad_logdet third derivatives - * - */ - virtual void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - HessMatrix& grad_grad_logdet, - GGGMatrix& grad_grad_grad_logdet); - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi); - - /** evaluate the gradients of values, gradients, laplacians of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients of values - * @param grad_grad_phi gradients of gradients - * @param grad_lapl_phi gradients of laplacians - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& grad_phi, - HessMatrix& grad_grad_phi, - GradMatrix& grad_lapl_phi); - - /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. - * - * @param[in] P particle set. - * @param[in] iel The electron at which to evaluate phi(r_iel) - * @param[in] source ion particle set. - * @param[in] iat_src ion ID w.r.t. which to take derivative. - * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). - * @return Void - */ - virtual void evaluateGradSourceRow(const ParticleSet& P, - int iel, - const ParticleSet& source, - int iat_src, - GradVector& gradphi); - - /** access the k point related to the given orbital */ - virtual PosType get_k(int orb) { return PosType(); } - - /** initialize a shared resource and hand it to collection - */ - virtual void createResource(ResourceCollection& collection) const {} - - /** acquire a shared resource from collection - */ - virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const {} - - /** return a shared resource to collection - */ - virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const {} - - /** make a clone of itself - * every derived class must implement this to have threading working correctly. - */ - [[noreturn]] virtual std::unique_ptr makeClone() const; - - /** Used only by cusp correction in AOS LCAO. - * Ye: the SoA LCAO moves all this responsibility to the builder. - * This interface should be removed with AoS. - */ - virtual bool transformSPOSet() { return true; } - - /** finalize the construction of SPOSet - * - * for example, classes serving accelerators may need to transfer data from host to device - * after the host side objects are built. - */ - virtual void finalizeConstruction() {} - - /// return object name - const std::string& getName() const { return my_name_; } - - /// return class name - virtual std::string getClassName() const = 0; - -protected: - /// name of the object, unique identifier - const std::string my_name_; - ///number of Single-particle orbitals - IndexType OrbitalSetSize; - /// Optimizable variables - opt_variables_type myVars; - - friend opt_variables_type& testing::getMyVars(SPOSet& spo); -}; - +using SPOSet = SPOSetT; using SPOSetPtr = SPOSet*; } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilder.cpp b/src/QMCWaveFunctions/SPOSetBuilder.cpp deleted file mode 100644 index 4264cb15c4..0000000000 --- a/src/QMCWaveFunctions/SPOSetBuilder.cpp +++ /dev/null @@ -1,182 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory -// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory -// -// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign -////////////////////////////////////////////////////////////////////////////////////// - - -#include "SPOSetBuilder.h" -#include "OhmmsData/AttributeSet.h" -#include - -#if !defined(QMC_COMPLEX) -#include "QMCWaveFunctions/RotatedSPOs.h" -#endif - -namespace qmcplusplus -{ -SPOSetBuilder::SPOSetBuilder(const std::string& type_name, Communicate* comm) - : MPIObjectBase(comm), legacy(true), type_name_(type_name) -{ - reserve_states(); -} - - -void SPOSetBuilder::reserve_states(int nsets) -{ - int sets_needed = nsets - states.size(); - if (sets_needed > 0) - for (int s = 0; s < sets_needed; ++s) - states.push_back(std::make_unique()); -} - - -std::unique_ptr SPOSetBuilder::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) -{ - myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented"); - return 0; -} - - -std::unique_ptr SPOSetBuilder::createSPOSet(xmlNodePtr cur) -{ - std::string spo_object_name; - std::string optimize("no"); - - OhmmsAttributeSet attrib; - attrib.add(spo_object_name, "id"); - attrib.add(spo_object_name, "name"); - attrib.add(optimize, "optimize"); - attrib.put(cur); - - app_summary() << std::endl; - app_summary() << " Single particle orbitals (SPO)" << std::endl; - app_summary() << " ------------------------------" << std::endl; - app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ - << " Builder class name: " << ClassName << std::endl; - app_summary() << std::endl; - - if (spo_object_name.empty()) - myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); - - // read specialized sposet construction requests - // and translate them into a set of orbital indices - SPOSetInputInfo input_info(cur); - - // process general sposet construction requests - // and preserve legacy interface - std::unique_ptr sposet; - - try - { - if (legacy && input_info.legacy_request) - sposet = createSPOSetFromXML(cur); - else - sposet = createSPOSet(cur, input_info); - } - catch (const UniformCommunicateError& ue) - { - myComm->barrier_and_abort(ue.what()); - } - - if (!sposet) - myComm->barrier_and_abort("SPOSetBuilder::createSPOSet sposet creation failed"); - - if (optimize == "rotation" || optimize == "yes") - { -#ifdef QMC_COMPLEX - app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n"; - abort(); -#else - app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead" - << std::endl; - - sposet->storeParamsBeforeRotation(); - // create sposet with rotation - auto& sposet_ref = *sposet; - app_log() << " SPOSet " << sposet_ref.getName() << " is optimizable\n"; - if (!sposet_ref.isRotationSupported()) - myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" + - sposet_ref.getClassName() + "'."); - auto rot_spo = std::make_unique(sposet_ref.getName(), std::move(sposet)); - xmlNodePtr tcur = cur->xmlChildrenNode; - while (tcur != NULL) - { - std::string cname((const char*)(tcur->name)); - if (cname == "opt_vars") - { - std::vector params; - putContent(params, tcur); - rot_spo->setRotationParameters(params); - } - tcur = tcur->next; - } - sposet = std::move(rot_spo); -#endif - } - - if (sposet->getName().empty()) - app_warning() << "SPOSet object doesn't have a name." << std::endl; - if (!spo_object_name.empty() && sposet->getName() != spo_object_name) - app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name - << " object name: " << sposet->getName() << std::endl; - - sposet->checkObject(); - return sposet; -} - -std::unique_ptr SPOSetBuilder::createRotatedSPOSet(xmlNodePtr cur) -{ - std::string spo_object_name; - std::string method; - OhmmsAttributeSet attrib; - attrib.add(spo_object_name, "name"); - attrib.add(method, "method", {"global", "history"}); - attrib.put(cur); - - -#ifdef QMC_COMPLEX - myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet."); - return nullptr; -#else - std::unique_ptr sposet; - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "sposet") - { - sposet = createSPOSet(element); - } - }); - - if (!sposet) - myComm->barrier_and_abort("Rotated SPO needs an SPOset"); - - if (!sposet->isRotationSupported()) - myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" + - sposet->getClassName() + "'."); - - sposet->storeParamsBeforeRotation(); - auto rot_spo = std::make_unique(spo_object_name, std::move(sposet)); - - if (method == "history") - rot_spo->set_use_global_rotation(false); - - processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { - if (cname == "opt_vars") - { - std::vector params; - putContent(params, element); - rot_spo->setRotationParameters(params); - } - }); - return rot_spo; -#endif -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilder.h b/src/QMCWaveFunctions/SPOSetBuilder.h index 8827df190e..6f500ba61f 100644 --- a/src/QMCWaveFunctions/SPOSetBuilder.h +++ b/src/QMCWaveFunctions/SPOSetBuilder.h @@ -21,71 +21,12 @@ #ifndef QMCPLUSPLUS_SPOSET_BUILDER_H #define QMCPLUSPLUS_SPOSET_BUILDER_H -#include -#include -#include -#include "Message/MPIObjectBase.h" -#include "QMCWaveFunctions/SPOSetInfo.h" -#include "QMCWaveFunctions/SPOSetInputInfo.h" -#include "QMCWaveFunctions/SPOSet.h" -#include "hdf/hdf_archive.h" +#include "Configuration.h" +#include "QMCWaveFunctions/SPOSetBuilderT.h" namespace qmcplusplus { -/** base class for the real SPOSet builder - * - * \warning { - * We have not quite figured out how to use real/complex efficiently. - * There are three cases we have to deal with - * - real basis functions and real coefficients - * - real basis functions and complex coefficients - * - complex basis functions and complex coefficients - * For now, we decide to keep both real and complex basis sets and expect - * the user classes {\bf KNOW} what they need to use. - * } - */ -class SPOSetBuilder : public QMCTraits, public MPIObjectBase -{ -public: - using indices_t = std::vector; - using energies_t = std::vector; - - /// whether implementation conforms only to legacy standard - bool legacy; - - /// state info of all possible states available in the basis - std::vector> states; - - SPOSetBuilder(const std::string& type_name, Communicate* comm); - virtual ~SPOSetBuilder() {} - - /// reserve space for states (usually only one set, multiple for e.g. spin dependent einspline) - void reserve_states(int nsets = 1); - - /// allow modification of state information - inline void modify_states(int index = 0) { states[index]->modify(); } - - /// clear state information - inline void clear_states(int index = 0) { states[index]->clear(); } - - /// create an sposet from xml and save the resulting SPOSet - std::unique_ptr createSPOSet(xmlNodePtr cur); - - /// create orbital rotation transformation from xml and save the resulting SPOSet - std::unique_ptr createRotatedSPOSet(xmlNodePtr cur); - - const std::string& getTypeName() const { return type_name_; } - -protected: - /// create an sposet from xml (legacy) - virtual std::unique_ptr createSPOSetFromXML(xmlNodePtr cur) = 0; - - /// create an sposet from a general xml request - virtual std::unique_ptr createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info); - - /// type name of the SPO objects built by this builder. - const std::string type_name_; -}; +using SPOSetBuilder = SPOSetBuilderT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactory.h b/src/QMCWaveFunctions/SPOSetBuilderFactory.h index 78db70ca76..be31b52a5e 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactory.h +++ b/src/QMCWaveFunctions/SPOSetBuilderFactory.h @@ -16,57 +16,11 @@ #ifndef QMCPLUSPLUS_BASISSETFACTORY_H #define QMCPLUSPLUS_BASISSETFACTORY_H -#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" -#include "QMCWaveFunctions/SPOSetBuilder.h" -#include "type_traits/template_types.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/SPOSetBuilderFactoryT.h" namespace qmcplusplus { -class SPOSetBuilderFactory : public MPIObjectBase -{ -public: - using SPOMap = SPOSet::SPOMap; - using PSetMap = std::map>; - - /** constructor - * \param comm communicator - * \param els reference to the electrons - * \param ions reference to the ions - */ - SPOSetBuilderFactory(Communicate* comm, ParticleSet& els, const PSetMap& psets); - - ~SPOSetBuilderFactory(); - - std::unique_ptr createSPOSetBuilder(xmlNodePtr rootNode); - - /** returns a named sposet from the pool - * only use in serial portion of execution - * ie during initialization prior to threaded code - */ - const SPOSet* getSPOSet(const std::string& name) const; - - void buildSPOSetCollection(xmlNodePtr cur); - - bool empty() const { return sposets.empty(); } - - /** add an SPOSet to sposets map. - * This is only used to handle legacy SPOSet input styles without using sposet_collection - */ - void addSPOSet(std::unique_ptr); - - SPOMap&& exportSPOSets() { return std::move(sposets); } - -private: - ///reference to the target particle - ParticleSet& targetPtcl; - - ///reference to the particle pool - const PSetMap& ptclPool; - - /// list of all sposets created by the builders of this factory - SPOMap sposets; - - static std::string basisset_tag; -}; +using SPOSetBuilderFactory = SPOSetBuilderFactoryT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactory.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp similarity index 57% rename from src/QMCWaveFunctions/SPOSetBuilderFactory.cpp rename to src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp index 25932eeb45..7745be4eea 100644 --- a/src/QMCWaveFunctions/SPOSetBuilderFactory.cpp +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp @@ -15,40 +15,57 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "SPOSetBuilderFactoryT.h" -#include "SPOSetBuilderFactory.h" -#include "SPOSetScanner.h" -#include "HarmonicOscillator/SHOSetBuilder.h" -#include "PlaneWave/PWOrbitalSetBuilder.h" #include "ModernStringUtils.hpp" -#include "ElectronGas/FreeOrbitalBuilder.h" +#include "QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h" +#include "QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h" +#include "QMCWaveFunctions/SPOSetScannerT.h" +#include "PlaneWave/PWOrbitalSetBuilder.h" #if OHMMS_DIM == 3 -#include "LCAO/LCAOrbitalBuilder.h" - -#if defined(QMC_COMPLEX) -#include "BsplineFactory/EinsplineSpinorSetBuilder.h" -#include "LCAO/LCAOSpinorBuilder.h" -#endif - +#include "QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h" #if defined(HAVE_EINSPLINE) -#include "BsplineFactory/EinsplineSetBuilder.h" +#include "QMCWaveFunctions/EinsplineSpinorSetBuilderT.h" #endif +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" #endif -#include "CompositeSPOSet.h" -#include "Utilities/ProgressReportEngine.h" -#include "Utilities/IteratorUtility.h" -#include "OhmmsData/AttributeSet.h" #include "Message/MPIObjectBase.h" - +#include "OhmmsData/AttributeSet.h" +#include "QMCWaveFunctions/CompositeSPOSetT.h" +#include "Utilities/IteratorUtility.h" +#include "Utilities/ProgressReportEngine.h" namespace qmcplusplus { -const SPOSet* SPOSetBuilderFactory::getSPOSet(const std::string& name) const +template +struct LCAOSpinorBuilderMaker +{ + template + std::unique_ptr> operator()(TArgs&&...) const + { + throw std::runtime_error("lcao spinors not compatible with non-complex value types"); + } +}; + +template +struct LCAOSpinorBuilderMaker> +{ + template + std::unique_ptr>> operator()(TArgs&&... args) const + { + return std::make_unique>>(std::forward(args)...); + } +}; + +template +const SPOSetT* SPOSetBuilderFactoryT::getSPOSet(const std::string& name) const { if (auto spoit = sposets.find(name); spoit == sposets.end()) { // keep this commented until legacy input styles are moved. - // In legacy input styles, this look up may fail and need to build SPOSet on the fly. + // In legacy input styles, this look up may fail and need to build + // SPOSetT on the fly. return nullptr; } else @@ -60,20 +77,26 @@ const SPOSet* SPOSetBuilderFactory::getSPOSet(const std::string& name) const * \param psi reference to the wavefunction * \param ions reference to the ions */ -SPOSetBuilderFactory::SPOSetBuilderFactory(Communicate* comm, ParticleSet& els, const PSetMap& psets) +template +SPOSetBuilderFactoryT::SPOSetBuilderFactoryT(Communicate* comm, ParticleSetT& els, const PSetMap& psets) : MPIObjectBase(comm), targetPtcl(els), ptclPool(psets) { - ClassName = "SPOSetBuilderFactory"; + ClassName = "SPOSetBuilderFactoryT"; } -SPOSetBuilderFactory::~SPOSetBuilderFactory() { DEBUG_MEMORY("SPOSetBuilderFactory::~SPOSetBuilderFactory"); } +template +SPOSetBuilderFactoryT::~SPOSetBuilderFactoryT() +{ + DEBUG_MEMORY("SPOSetBuilderFactoryT::~SPOSetBuilderFactoryT"); +} -std::unique_ptr SPOSetBuilderFactory::createSPOSetBuilder(xmlNodePtr rootNode) +template +std::unique_ptr> SPOSetBuilderFactoryT::createSPOSetBuilder(xmlNodePtr rootNode) { ReportEngine PRE(ClassName, "createSPOSetBuilder"); std::string sourceOpt("ion0"); - std::string type(""); - std::string name(""); + std::string type; + std::string name; OhmmsAttributeSet aAttrib; aAttrib.add(sourceOpt, "source"); aAttrib.add(type, "type"); @@ -85,26 +108,26 @@ std::unique_ptr SPOSetBuilderFactory::createSPOSetBuilder(xmlNode std::string type_in = type; type = lowerCase(type); - //when name is missing, type becomes the input + // when name is missing, type becomes the input if (name.empty()) name = type_in; - std::unique_ptr bb; + std::unique_ptr> bb; if (type == "composite") { app_log() << "Composite SPO set with existing SPOSets." << std::endl; - bb = std::make_unique(myComm, *this); + bb = std::make_unique>(myComm, *this); } else if (type == "jellium" || type == "heg" || type == "free") { app_log() << "Free-particle SPO set" << std::endl; - bb = std::make_unique(targetPtcl, myComm, rootNode); + bb = std::make_unique>(targetPtcl, myComm, rootNode); } else if (type == "sho") { app_log() << "Harmonic Oscillator SPO set" << std::endl; - bb = std::make_unique(targetPtcl, myComm); + bb = std::make_unique>(targetPtcl, myComm); } else if (type == "PWBasis" || type == "PW" || type == "pw") { @@ -118,16 +141,20 @@ std::unique_ptr SPOSetBuilderFactory::createSPOSetBuilder(xmlNode { #ifdef QMC_COMPLEX app_log() << "Einspline Spinor Set\n"; - bb = std::make_unique(targetPtcl, ptclPool, myComm, rootNode); + // FIXME + bb = std::make_unique>(targetPtcl, ptclPool, myComm, rootNode); #else - PRE.error("Use of einspline spinors requires QMC_COMPLEX=1. Rebuild with this option"); + PRE.error("Use of einspline spinors requires QMC_COMPLEX=1. " + "Rebuild with this option"); #endif } else { #if defined(HAVE_EINSPLINE) - PRE << "EinsplineSetBuilder: using libeinspline for B-spline orbitals.\n"; - bb = std::make_unique(targetPtcl, ptclPool, myComm, rootNode); + PRE << "EinsplineSetBuilder: using libeinspline for B-spline " + "orbitals.\n"; + // FIXME + bb = std::make_unique>(targetPtcl, ptclPool, myComm, rootNode); #else PRE.error("Einspline is missing for B-spline orbitals", true); #endif @@ -135,34 +162,40 @@ std::unique_ptr SPOSetBuilderFactory::createSPOSetBuilder(xmlNode } else if (type == "molecularorbital" || type == "mo") { - ParticleSet* ions = nullptr; - //initialize with the source tag + ParticleSetT* ions = nullptr; + // initialize with the source tag auto pit(ptclPool.find(sourceOpt)); if (pit == ptclPool.end()) PRE.error("Missing basisset/@source.", true); else ions = pit->second.get(); if (targetPtcl.isSpinor()) -#ifdef QMC_COMPLEX - bb = std::make_unique(targetPtcl, *ions, myComm, rootNode); -#else - PRE.error("Use of lcao spinors requires QMC_COMPLEX=1. Rebuild with this option"); -#endif + { + try + { + bb = LCAOSpinorBuilderMaker{}(targetPtcl, *ions, myComm, rootNode); + } + catch (const std::exception& e) + { + PRE.error(e.what()); + } + } else - bb = std::make_unique(targetPtcl, *ions, myComm, rootNode); + bb = std::make_unique>(targetPtcl, *ions, myComm, rootNode); } -#endif //OHMMS_DIM==3 +#endif // OHMMS_DIM==3 PRE.flush(); if (!bb) - myComm->barrier_and_abort("SPOSetBuilderFactory::createSPOSetBuilder SPOSetBuilder creation failed."); + myComm->barrier_and_abort("SPOSetBuilderFactoryT::createSPOSetBuilder " + "SPOSetBuilderT creation failed."); - app_log() << " Created SPOSet builder named '" << name << "' of type " << type << std::endl; + app_log() << " Created SPOSetT builder named '" << name << "' of type " << type << std::endl; return bb; } - -void SPOSetBuilderFactory::buildSPOSetCollection(xmlNodePtr cur) +template +void SPOSetBuilderFactoryT::buildSPOSetCollection(xmlNodePtr cur) { std::string collection_name; std::string collection_type; @@ -181,7 +214,7 @@ void SPOSetBuilderFactory::buildSPOSetCollection(xmlNodePtr cur) app_summary() << " Name: " << collection_name << " Type input: " << collection_type << std::endl; app_summary() << std::endl; - // create the SPOSet builder + // create the SPOSetT builder auto bb = createSPOSetBuilder(cur); // going through a list of sposet entries @@ -189,31 +222,33 @@ void SPOSetBuilderFactory::buildSPOSetCollection(xmlNodePtr cur) processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { if (cname == "sposet") { - addSPOSet(std::unique_ptr(bb->createSPOSet(element))); + addSPOSet(std::unique_ptr>(bb->createSPOSet(element))); nsposets++; } if (cname == "rotated_sposet") { - addSPOSet(std::unique_ptr(bb->createRotatedSPOSet(element))); + addSPOSet(std::unique_ptr>(bb->createRotatedSPOSet(element))); nsposets++; } }); if (nsposets == 0) - myComm->barrier_and_abort("SPOSetBuilderFactory::buildSPOSetCollection no elements found"); + myComm->barrier_and_abort("SPOSetBuilderFactoryT::buildSPOSetCollection no " + "elements found"); // going through a list of spo_scanner entries processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { if (cname == "spo_scanner") if (myComm->rank() == 0) { - SPOSetScanner ascanner(sposets, targetPtcl, ptclPool); + SPOSetScannerT ascanner(sposets, targetPtcl, ptclPool); ascanner.put(element); } }); } -void SPOSetBuilderFactory::addSPOSet(std::unique_ptr spo) +template +void SPOSetBuilderFactoryT::addSPOSet(std::unique_ptr> spo) { if (spo->getName().empty()) myComm->barrier_and_abort("sposet created in sposet_collection must have a name!"); @@ -224,6 +259,20 @@ void SPOSetBuilderFactory::addSPOSet(std::unique_ptr spo) sposets.emplace(spo->getName(), std::move(spo)); } -std::string SPOSetBuilderFactory::basisset_tag = "basisset"; +template +std::string SPOSetBuilderFactoryT::basisset_tag = "basisset"; +#ifdef QMC_COMPLEX +#ifndef MIXED_PRECISION +template class SPOSetBuilderFactoryT>; +#else +template class SPOSetBuilderFactoryT>; +#endif +#else +#ifndef MIXED_PRECISION +template class SPOSetBuilderFactoryT; +#else +template class SPOSetBuilderFactoryT; +#endif +#endif } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h new file mode 100644 index 0000000000..65f09b509f --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h @@ -0,0 +1,73 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_BASISSETFACTORYT_H +#define QMCPLUSPLUS_BASISSETFACTORYT_H + +#include "QMCWaveFunctions/SPOSetBuilderT.h" +#include "QMCWaveFunctions/WaveFunctionComponentBuilder.h" +#include "type_traits/template_types.hpp" + +namespace qmcplusplus +{ +template +class SPOSetBuilderFactoryT : public MPIObjectBase +{ +public: + using SPOMap = typename SPOSetT::SPOMap; + using PSetMap = std::map>>; + + /** constructor + * \param comm communicator + * \param els reference to the electrons + * \param ions reference to the ions + */ + SPOSetBuilderFactoryT(Communicate* comm, ParticleSetT& els, const PSetMap& psets); + + ~SPOSetBuilderFactoryT(); + + std::unique_ptr> createSPOSetBuilder(xmlNodePtr rootNode); + + /** returns a named sposet from the pool + * only use in serial portion of execution + * ie during initialization prior to threaded code + */ + const SPOSetT* getSPOSet(const std::string& name) const; + + void buildSPOSetCollection(xmlNodePtr cur); + + bool empty() const { return sposets.empty(); } + + /** add an SPOSet to sposets map. + * This is only used to handle legacy SPOSet input styles without using + * sposet_collection + */ + void addSPOSet(std::unique_ptr>); + + SPOMap&& exportSPOSets() { return std::move(sposets); } + +private: + /// reference to the target particle + ParticleSetT& targetPtcl; + + /// reference to the particle pool + const PSetMap& ptclPool; + + /// list of all sposets created by the builders of this factory + SPOMap sposets; + + static std::string basisset_tag; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.cpp b/src/QMCWaveFunctions/SPOSetBuilderT.cpp new file mode 100644 index 0000000000..b83c265af9 --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetBuilderT.cpp @@ -0,0 +1,449 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#include "SPOSetBuilderT.h" +#include "OhmmsData/AttributeSet.h" +#include +#include "QMCWaveFunctions/RotatedSPOsT.h" // only for real wavefunctions + +namespace qmcplusplus +{ +template +SPOSetBuilderT::SPOSetBuilderT(const std::string& type_name, Communicate* comm) + : MPIObjectBase(comm), legacy(true), type_name_(type_name) +{ + reserve_states(); +} + +template +void SPOSetBuilderT::reserve_states(int nsets) +{ + int sets_needed = nsets - states.size(); + if (sets_needed > 0) + for (int s = 0; s < sets_needed; ++s) + states.push_back(std::make_unique()); +} + +template<> +std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string optimize("no"); + + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "id"); + attrib.add(spo_object_name, "name"); + attrib.add(optimize, "optimize"); + attrib.put(cur); + + app_summary() << std::endl; + app_summary() << " Single particle orbitals (SPO)" << std::endl; + app_summary() << " ------------------------------" << std::endl; + app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ + << " Builder class name: " << ClassName << std::endl; + app_summary() << std::endl; + + if (spo_object_name.empty()) + myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); + + // read specialized sposet construction requests + // and translate them into a set of orbital indices + SPOSetInputInfo input_info(cur); + + // process general sposet construction requests + // and preserve legacy interface + std::unique_ptr> sposet; + + try + { + if (legacy && input_info.legacy_request) + sposet = createSPOSetFromXML(cur); + else + sposet = createSPOSet(cur, input_info); + } + catch (const UniformCommunicateError& ue) + { + myComm->barrier_and_abort(ue.what()); + } + + if (!sposet) + myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed"); + + if (optimize == "rotation" || optimize == "yes") + { + app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead" + << std::endl; + + sposet->storeParamsBeforeRotation(); + // create sposet with rotation + auto& sposet_ref = *sposet; + app_log() << " SPOSet " << sposet_ref.getName() << " is optimizable\n"; + if (!sposet_ref.isRotationSupported()) + myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" + + sposet_ref.getClassName() + "'."); + auto rot_spo = std::make_unique>(sposet_ref.getName(), std::move(sposet)); + xmlNodePtr tcur = cur->xmlChildrenNode; + while (tcur != NULL) + { + std::string cname((const char*)(tcur->name)); + if (cname == "opt_vars") + { + std::vector params; + putContent(params, tcur); + rot_spo->setRotationParameters(params); + } + tcur = tcur->next; + } + sposet = std::move(rot_spo); + } + + if (sposet->getName().empty()) + app_warning() << "SPOSet object doesn't have a name." << std::endl; + if (!spo_object_name.empty() && sposet->getName() != spo_object_name) + app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name + << " object name: " << sposet->getName() << std::endl; + + sposet->checkObject(); + return sposet; +} + +template<> +std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string optimize("no"); + + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "id"); + attrib.add(spo_object_name, "name"); + attrib.add(optimize, "optimize"); + attrib.put(cur); + + app_summary() << std::endl; + app_summary() << " Single particle orbitals (SPO)" << std::endl; + app_summary() << " ------------------------------" << std::endl; + app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ + << " Builder class name: " << ClassName << std::endl; + app_summary() << std::endl; + + if (spo_object_name.empty()) + myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); + + // read specialized sposet construction requests + // and translate them into a set of orbital indices + SPOSetInputInfo input_info(cur); + + // process general sposet construction requests + // and preserve legacy interface + std::unique_ptr> sposet; + + try + { + if (legacy && input_info.legacy_request) + sposet = createSPOSetFromXML(cur); + else + sposet = createSPOSet(cur, input_info); + } + catch (const UniformCommunicateError& ue) + { + myComm->barrier_and_abort(ue.what()); + } + + if (!sposet) + myComm->barrier_and_abort("SPOSetBuilderT::createSPOSet sposet creation failed"); + + if (optimize == "rotation" || optimize == "yes") + { + app_warning() << "Specifying orbital rotation via optimize tag is deprecated. Use the rotated_spo element instead" + << std::endl; + + sposet->storeParamsBeforeRotation(); + // create sposet with rotation + auto& sposet_ref = *sposet; + app_log() << " SPOSet " << sposet_ref.getName() << " is optimizable\n"; + if (!sposet_ref.isRotationSupported()) + myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet_ref.getName() + "' of type '" + + sposet_ref.getClassName() + "'."); + auto rot_spo = std::make_unique>(sposet_ref.getName(), std::move(sposet)); + xmlNodePtr tcur = cur->xmlChildrenNode; + while (tcur != NULL) + { + std::string cname((const char*)(tcur->name)); + if (cname == "opt_vars") + { + std::vector params; + putContent(params, tcur); + rot_spo->setRotationParameters(params); + } + tcur = tcur->next; + } + sposet = std::move(rot_spo); + } + + if (sposet->getName().empty()) + app_warning() << "SPOSet object doesn't have a name." << std::endl; + if (!spo_object_name.empty() && sposet->getName() != spo_object_name) + app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name + << " object name: " << sposet->getName() << std::endl; + + sposet->checkObject(); + return sposet; +} + +template<> +std::unique_ptr>> SPOSetBuilderT>::createSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string optimize("no"); + + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "id"); + attrib.add(spo_object_name, "name"); + attrib.add(optimize, "optimize"); + attrib.put(cur); + + app_summary() << std::endl; + app_summary() << " Single particle orbitals (SPO)" << std::endl; + app_summary() << " ------------------------------" << std::endl; + app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ + << " Builder class name: " << ClassName << std::endl; + app_summary() << std::endl; + + if (spo_object_name.empty()) + myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); + + // read specialized sposet construction requests + // and translate them into a set of orbital indices + SPOSetInputInfo input_info(cur); + + // process general sposet construction requests + // and preserve legacy interface + std::unique_ptr>> sposet; + + try + { + if (legacy && input_info.legacy_request) + sposet = createSPOSetFromXML(cur); + else + sposet = createSPOSet(cur, input_info); + } + catch (const UniformCommunicateError& ue) + { + myComm->barrier_and_abort(ue.what()); + } + + if (!sposet) + myComm->barrier_and_abort("SPOSetBuilderT>::createSPOSet sposet creation failed"); + + if (optimize == "rotation" || optimize == "yes") + { + app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n"; + abort(); + } + + if (sposet->getName().empty()) + app_warning() << "SPOSet object doesn't have a name." << std::endl; + if (!spo_object_name.empty() && sposet->getName() != spo_object_name) + app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name + << " object name: " << sposet->getName() << std::endl; + + sposet->checkObject(); + return sposet; +} + +template<> +std::unique_ptr>> SPOSetBuilderT>::createSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string optimize("no"); + + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "id"); + attrib.add(spo_object_name, "name"); + attrib.add(optimize, "optimize"); + attrib.put(cur); + + app_summary() << std::endl; + app_summary() << " Single particle orbitals (SPO)" << std::endl; + app_summary() << " ------------------------------" << std::endl; + app_summary() << " Name: " << spo_object_name << " Type: " << type_name_ + << " Builder class name: " << ClassName << std::endl; + app_summary() << std::endl; + + if (spo_object_name.empty()) + myComm->barrier_and_abort("SPOSet object \"name\" attribute not given in the input!"); + + // read specialized sposet construction requests + // and translate them into a set of orbital indices + SPOSetInputInfo input_info(cur); + + // process general sposet construction requests + // and preserve legacy interface + std::unique_ptr>> sposet; + + try + { + if (legacy && input_info.legacy_request) + sposet = createSPOSetFromXML(cur); + else + sposet = createSPOSet(cur, input_info); + } + catch (const UniformCommunicateError& ue) + { + myComm->barrier_and_abort(ue.what()); + } + + if (!sposet) + myComm->barrier_and_abort("SPOSetBuilderT>::createSPOSet sposet creation failed"); + + if (optimize == "rotation" || optimize == "yes") + { + app_error() << "Orbital optimization via rotation doesn't support complex wavefunction yet.\n"; + abort(); + } + + if (sposet->getName().empty()) + app_warning() << "SPOSet object doesn't have a name." << std::endl; + if (!spo_object_name.empty() && sposet->getName() != spo_object_name) + app_warning() << "SPOSet object name mismatched! input name: " << spo_object_name + << " object name: " << sposet->getName() << std::endl; + + sposet->checkObject(); + return sposet; +} + +template +std::unique_ptr> SPOSetBuilderT::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info) +{ + myComm->barrier_and_abort("BasisSetBase::createSPOSet(cur,input_info) has not been implemented"); + return nullptr; +} + + +template<> +std::unique_ptr> SPOSetBuilderT::createRotatedSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string method; + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "name"); + attrib.add(method, "method", {"global", "history"}); + attrib.put(cur); + + std::unique_ptr> sposet; + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "sposet") + { + sposet = createSPOSet(element); + } + }); + + if (!sposet) + myComm->barrier_and_abort("Rotated SPO needs an SPOset"); + + if (!sposet->isRotationSupported()) + myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" + + sposet->getClassName() + "'."); + + sposet->storeParamsBeforeRotation(); + auto rot_spo = std::make_unique>(spo_object_name, std::move(sposet)); + + if (method == "history") + rot_spo->set_use_global_rotation(false); + + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "opt_vars") + { + std::vector params; + putContent(params, element); + rot_spo->setRotationParameters(params); + } + }); + return rot_spo; +} + +template<> +std::unique_ptr> SPOSetBuilderT::createRotatedSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string method; + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "name"); + attrib.add(method, "method", {"global", "history"}); + attrib.put(cur); + + std::unique_ptr> sposet; + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "sposet") + { + sposet = createSPOSet(element); + } + }); + + if (!sposet) + myComm->barrier_and_abort("Rotated SPO needs an SPOset"); + + if (!sposet->isRotationSupported()) + myComm->barrier_and_abort("Orbital rotation not supported with '" + sposet->getName() + "' of type '" + + sposet->getClassName() + "'."); + + sposet->storeParamsBeforeRotation(); + auto rot_spo = std::make_unique>(spo_object_name, std::move(sposet)); + + if (method == "history") + rot_spo->set_use_global_rotation(false); + + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname == "opt_vars") + { + std::vector params; + putContent(params, element); + rot_spo->setRotationParameters(params); + } + }); + return rot_spo; +} + +template<> +std::unique_ptr>> SPOSetBuilderT>::createRotatedSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string method; + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "name"); + attrib.add(method, "method", {"global", "history"}); + attrib.put(cur); + myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet."); + return nullptr; +} + +template<> +std::unique_ptr>> SPOSetBuilderT>::createRotatedSPOSet(xmlNodePtr cur) +{ + std::string spo_object_name; + std::string method; + OhmmsAttributeSet attrib; + attrib.add(spo_object_name, "name"); + attrib.add(method, "method", {"global", "history"}); + attrib.put(cur); + myComm->barrier_and_abort("Orbital optimization via rotation doesn't support complex wavefunctions yet."); + return nullptr; +} + + +template class SPOSetBuilderT; +template class SPOSetBuilderT; +template class SPOSetBuilderT>; +template class SPOSetBuilderT>; +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetBuilderT.h b/src/QMCWaveFunctions/SPOSetBuilderT.h new file mode 100644 index 0000000000..1183a56ad3 --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetBuilderT.h @@ -0,0 +1,94 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file SPOSetBuilderT.h + * @brief Declaration of a base class of SPOSet Builders + */ +#ifndef QMCPLUSPLUS_SPOSET_BUILDERT_H +#define QMCPLUSPLUS_SPOSET_BUILDERT_H + +#include +#include +#include +#include "Message/MPIObjectBase.h" +#include "QMCWaveFunctions/SPOSetInfo.h" +#include "QMCWaveFunctions/SPOSetInputInfo.h" +#include "QMCWaveFunctions/SPOSetT.h" +#include "hdf/hdf_archive.h" + +namespace qmcplusplus +{ +/** base class for the real SPOSet builder + * + * \warning { + * We have not quite figured out how to use real/complex efficiently. + * There are three cases we have to deal with + * - real basis functions and real coefficients + * - real basis functions and complex coefficients + * - complex basis functions and complex coefficients + * For now, we decide to keep both real and complex basis sets and expect + * the user classes {\bf KNOW} what they need to use. + * } + */ +template +class SPOSetBuilderT : public QMCTraits, public MPIObjectBase +{ +public: + using PosType = typename SPOSetT::PosType; + using RealType = typename SPOSetT::RealType; + using indices_t = std::vector; + using energies_t = std::vector; + + /// whether implementation conforms only to legacy standard + bool legacy; + + /// state info of all possible states available in the basis + std::vector> states; + + SPOSetBuilderT(const std::string& type_name, Communicate* comm); + virtual ~SPOSetBuilderT() {} + + /// reserve space for states (usually only one set, multiple for e.g. spin dependent einspline) + void reserve_states(int nsets = 1); + + /// allow modification of state information + inline void modify_states(int index = 0) { states[index]->modify(); } + + /// clear state information + inline void clear_states(int index = 0) { states[index]->clear(); } + + /// create an sposet from xml and save the resulting SPOSet + [[nodiscard]] std::unique_ptr> createSPOSet(xmlNodePtr cur); + + /// create orbital rotation transformation from xml and save the resulting SPOSet + [[nodiscard]] std::unique_ptr> createRotatedSPOSet(xmlNodePtr cur); + + const std::string& getTypeName() const { return type_name_; } + +protected: + /// create an sposet from xml (legacy) + virtual std::unique_ptr> createSPOSetFromXML(xmlNodePtr cur) = 0; + + /// create an sposet from a general xml request + virtual std::unique_ptr> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input_info); + + /// type name of the SPO objects built by this builder. + const std::string type_name_; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/SPOSetInfo.h b/src/QMCWaveFunctions/SPOSetInfo.h index 9653dc73f8..04961ebe63 100644 --- a/src/QMCWaveFunctions/SPOSetInfo.h +++ b/src/QMCWaveFunctions/SPOSetInfo.h @@ -129,7 +129,8 @@ class SPOSetInfo /// empty collection and render mutable void clear(); - friend class SPOSetBuilder; + template + friend class SPOSetBuilderT; }; diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h new file mode 100644 index 0000000000..914a8ff46c --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetScannerT.h @@ -0,0 +1,254 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPOSET_SCANNERT_H +#define QMCPLUSPLUS_SPOSET_SCANNERT_H + +#include "OhmmsData/AttributeSet.h" +#include "Particle/ParticleSet.h" +#include "QMCWaveFunctions/OrbitalSetTraits.h" +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +struct OutputReportMakerBase +{ + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + + const ValueVector& SPO_v_avg; + const ValueVector& SPO_l_avg; + const GradVector& SPO_g_avg; + int nknots; +}; + +template +struct OutputReportMaker : OutputReportMakerBase +{ + using RealType = typename SPOSetT::RealType; + + void operator()(std::ofstream& output_report) const + { + output_report << "# Report: Orb Value_avg Gradients_avg Laplacian_avg" << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) + { + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_g_avg[iorb][0] * one_over_nknots << " " << this->SPO_g_avg[iorb][1] * one_over_nknots + << " " << this->SPO_g_avg[iorb][2] * one_over_nknots << " " + << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl; + } + } +}; + +template +struct OutputReportMaker> : OutputReportMakerBase> +{ + using RealType = typename SPOSetT::RealType; + + void operator()(std::ofstream& output_report) const + { + output_report << "# Report: Orb Value_avg I/R Gradients_avg Laplacian_avg" << std::endl; + for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) + { + auto one_over_nknots = static_cast(1.0 / this->nknots); + output_report << "\t" << iorb << " " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << " " + << this->SPO_v_avg[iorb].imag() / this->SPO_v_avg[iorb].real() << " " + << this->SPO_g_avg[iorb][0] * one_over_nknots << " " << this->SPO_g_avg[iorb][1] * one_over_nknots + << " " << this->SPO_g_avg[iorb][2] * one_over_nknots << " " + << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl; + } + } +}; + +/** a scanner for all the SPO sets. + */ +template +class SPOSetScannerT +{ +public: + using PtclPool = std::map>>; + using SPOSetMap = typename SPOSetT::SPOMap; + using RealType = typename SPOSetT::RealType; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + using HessVector = typename SPOSetT::HessVector; + + RealType myfabs(RealType s) { return std::fabs(s); } + template + std::complex myfabs(std::complex& s) + { + return std::complex(myfabs(s.real()), myfabs(s.imag())); + } + template + TinyVector myfabs(TinyVector& s) + { + return TinyVector(myfabs(s[0]), myfabs(s[1]), myfabs(s[2])); + } + + const SPOSetMap& sposets; + ParticleSetT& target; + const PtclPool& ptcl_pool_; + ParticleSetT* ions; + + // construction/destruction + SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT& targetPtcl, const PtclPool& psets) + : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){}; + //~SPOSetScannerT(){}; + + // processing scanning + void put(xmlNodePtr cur) + { + app_log() << "Entering the SPO set scanner!" << std::endl; + // check in the source particle set and search for it in the pool. + std::string sourcePtcl("ion0"); + OhmmsAttributeSet aAttrib; + aAttrib.add(sourcePtcl, "source"); + aAttrib.put(cur); + auto pit(ptcl_pool_.find(sourcePtcl)); + if (pit == ptcl_pool_.end()) + app_log() << "Source particle set not found. Can not be used as " + "reference point." + << std::endl; + else + ions = pit->second.get(); + + // scanning the SPO sets + xmlNodePtr cur_save = cur; + for (const auto& [name, sposet] : sposets) + { + app_log() << " Processing SPO " << sposet->getName() << std::endl; + // scanning the paths + cur = cur_save->children; + while (cur != NULL) + { + std::string trace_name("no name"); + OhmmsAttributeSet aAttrib; + aAttrib.add(trace_name, "name"); + aAttrib.put(cur); + std::string cname(getNodeName(cur)); + std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name); + if (cname == "path") + { + app_log() << " Scanning a " << cname << " called " << trace_name << " and writing to " + << prefix + "_v/g/l/report.dat" << std::endl; + auto spo = sposet->makeClone(); + scan_path(cur, *spo, prefix); + } + else + { + if (cname != "text" && cname != "comment") + app_log() << " Unknown type of scanning " << cname << std::endl; + } + cur = cur->next; + } + } + app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl; + } + + // scanning a path + void scan_path(xmlNodePtr cur, SPOSetT& sposet, std::string prefix) + { + std::string file_name; + file_name = prefix + "_v.dat"; + std::ofstream output_v(file_name.c_str()); + file_name = prefix + "_g.dat"; + std::ofstream output_g(file_name.c_str()); + file_name = prefix + "_l.dat"; + std::ofstream output_l(file_name.c_str()); + file_name = prefix + "_report.dat"; + std::ofstream output_report(file_name.c_str()); + + int nknots(2); + int from_atom(-1); + int to_atom(-1); + TinyVector from_pos(0.0, 0.0, 0.0); + TinyVector to_pos(0.0, 0.0, 0.0); + + OhmmsAttributeSet aAttrib; + aAttrib.add(nknots, "nknots"); + aAttrib.add(from_atom, "from_atom"); + aAttrib.add(to_atom, "to_atom"); + aAttrib.add(from_pos, "from_pos"); + aAttrib.add(to_pos, "to_pos"); + aAttrib.put(cur); + + // sanity check + if (nknots < 2) + nknots = 2; + // check out the reference atom coordinates + if (ions) + { + if (from_atom >= 0 && from_atom < ions->R.size()) + from_pos = ions->R[from_atom]; + if (to_atom >= 0 && to_atom < ions->R.size()) + to_pos = ions->R[to_atom]; + } + + // prepare a fake particle set + ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg; + GradVector SPO_g, SPO_g_avg; + int OrbitalSize(sposet.size()); + SPO_v.resize(OrbitalSize); + SPO_g.resize(OrbitalSize); + SPO_l.resize(OrbitalSize); + SPO_v_avg.resize(OrbitalSize); + SPO_g_avg.resize(OrbitalSize); + SPO_l_avg.resize(OrbitalSize); + SPO_v_avg = 0.0; + SPO_g_avg = 0.0; + SPO_l_avg = 0.0; + double Delta = 1.0 / (nknots - 1); + int elec_count = target.R.size(); + auto R_saved = target.R; + typename ParticleSetT::SingleParticlePos zero_pos(0.0, 0.0, 0.0); + for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) + { + if (ind == elec_count) + ind = 0; + target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0]; + target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1]; + target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2]; + target.makeMove(ind, zero_pos); + sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l); + std::ostringstream o; + o << "x_y_z " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " " + << target.R[ind][2]; + output_v << o.str() << " : " << std::scientific << std::setprecision(12); + output_g << o.str() << " : " << std::scientific << std::setprecision(12); + output_l << o.str() << " : " << std::scientific << std::setprecision(12); + for (int iorb = 0; iorb < OrbitalSize; iorb++) + { + SPO_v_avg[iorb] += myfabs(SPO_v[iorb]); + SPO_g_avg[iorb] += myfabs(SPO_g[iorb]); + SPO_l_avg[iorb] += myfabs(SPO_l[iorb]); + output_v << SPO_v[iorb] << " "; + output_g << SPO_g[iorb][0] << " " << SPO_g[iorb][1] << " " << SPO_g[iorb][2] << " "; + output_l << SPO_l[iorb] << " "; + } + output_v << std::endl; + output_g << std::endl; + output_l << std::endl; + } + // restore the whole target. + target.R = R_saved; + target.update(); + OutputReportMaker{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(output_report); + output_v.close(); + output_g.close(); + output_l.close(); + output_report.close(); + } +}; +} // namespace qmcplusplus + +#endif diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp new file mode 100644 index 0000000000..9397bca04f --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetT.cpp @@ -0,0 +1,440 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill,berrillma@ornl.gov, Oak Ridge National Laboratory +// William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#include "SPOSetT.h" + +#include "CPU/SIMD/inner_product.hpp" // simd::dot + +namespace qmcplusplus +{ + +template +SPOSetT::SPOSetT(const std::string& my_name) : my_name_(my_name), OrbitalSetSize(0) +{} + +template +void SPOSetT::extractOptimizableObjectRefs(UniqueOptObjRefsT&) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::extractOptimizableObjectRefs " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::checkOutVariables(const OptVariablesTypeT& active) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::checkOutVariables " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios) +{ + assert(psi.size() == psiinv.size()); + for (int iat = 0; iat < VP.getTotalNum(); ++iat) + { + evaluateValue(VP, iat, psi); + ratios[iat] = simd::dot(psi.data(), psiinv.data(), psi.size()); + } +} + +template +void SPOSetT::mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const +{ + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + { + Vector invRow(const_cast(invRow_ptr_list[iw]), psi_list[iw].get().size()); + spo_list[iw].evaluateDetRatios(vp_list[iw], psi_list[iw], invRow, ratios_list[iw]); + } +} + +template +void SPOSetT::evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin) +{ + throw std::runtime_error("Need specialization of SPOSet::evaluateVGL_spin"); +} + +template +void SPOSetT::mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluateVGL(P_list[iw], iat, psi_v_list[iw], dpsi_v_list[iw], d2psi_v_list[iw]); +} + +template +void SPOSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const +{ + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluateValue(P_list[iw], iat, psi_v_list[iw]); +} + +template +void SPOSetT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const +{ + throw std::runtime_error(getClassName() + "::mw_evaluateVGLWithSpin() is not supported. \n"); +} + +template +void SPOSetT::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const +{ + assert(this == &spo_list.getLeader()); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); + assert(phi_vgl_v.size(1) == spo_list.size()); + const size_t nw = spo_list.size(); + const size_t norb_requested = phi_vgl_v.size(2); + GradVector dphi_v(norb_requested); + for (int iw = 0; iw < nw; iw++) + { + ValueVector phi_v(phi_vgl_v.data_at(0, iw, 0), norb_requested); + ValueVector d2phi_v(phi_vgl_v.data_at(4, iw, 0), norb_requested); + spo_list[iw].evaluateVGL(P_list[iw], iat, phi_v, dphi_v, d2phi_v); + + ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_v.data(), norb_requested); + grads[iw] = simd::dot(invRow_ptr_list[iw], dphi_v.data(), norb_requested) / ratios[iw]; + + // transpose the array of gradients to SoA in phi_vgl_v + for (size_t idim = 0; idim < DIM; idim++) + { + T* phi_g = phi_vgl_v.data_at(idim + 1, iw, 0); + for (size_t iorb = 0; iorb < norb_requested; iorb++) + phi_g[iorb] = dphi_v[iorb][idim]; + } + } + phi_vgl_v.updateTo(); +} + +template +void SPOSetT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const +{ + throw std::runtime_error("Need specialization of " + getClassName() + + "::mw_evaluateVGLandDetRatioGradsWithSpin(). \n"); +} + +template +void SPOSetT::evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& grad_grad_grad_logdet) +{ + throw std::runtime_error("Need specialization of SPOSet::evaluateThirdDeriv(). \n"); +} + +template +void SPOSetT::evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) +{ + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) " + "(vector quantities)\n"); +} + +template +void SPOSetT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const +{ + assert(this == &spo_list.getLeader()); + for (int iw = 0; iw < spo_list.size(); iw++) + spo_list[iw].evaluate_notranspose(P_list[iw], first, last, logdet_list[iw], dlogdet_list[iw], d2logdet_list[iw]); +} + +template +void SPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet) +{ + throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for " + "grad_grad_logdet. \n"); +} + +template +void SPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet) +{ + throw std::runtime_error("Need specialization of SPOSet::evaluate_notranspose() for " + "grad_grad_grad_logdet. \n"); +} + +template +std::unique_ptr> SPOSetT::makeClone() const +{ + throw std::runtime_error("Missing SPOSet::makeClone for " + getClassName()); +} + +template +void SPOSetT::basic_report(const std::string& pad) const +{ + app_log() << pad << "size = " << size() << std::endl; + app_log() << pad << "state info:" << std::endl; + // states.report(pad+" "); + app_log().flush(); +} + +template +void SPOSetT::evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi) +{ + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate(P,iat,psi,dpsi,dhpsi) (vector quantities)\n"); +} + +template +void SPOSetT::evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi) +{ + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate(P,iat,psi,dpsi,dhpsi,dghpsi) (vector quantities)\n"); +} + +template +void SPOSetT::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) +{ + if (isRotationSupported()) + throw std::logic_error("Bug!! " + getClassName() + + "::applyRotation " + "must be overloaded when the SPOSet supports rotation."); +} + +template +void SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivatives " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivativesWF " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex) +{ + // Match the fallback in WaveFunctionComponent that evaluates just the + // ratios + evaluateDetRatios(VP, psi, psiinv, ratios); + + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivRatios " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivatives " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl) +{ + if (isOptimizable()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateDerivativesWF " + "must be overloaded when the SPOSet is optimizable."); +} + +template +void SPOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) +{ + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSource " + "must be overloaded when the SPOSet has ion derivatives."); +} + +template +void SPOSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi) +{ + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSource " + "must be overloaded when the SPOSet has ion derivatives."); +} + +template +void SPOSetT::evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& gradphi) +{ + if (hasIonDerivs()) + throw std::logic_error("Bug!! " + getClassName() + + "::evaluateGradSourceRow " + "must be overloaded when the SPOSet has ion derivatives."); +} + +template +void SPOSetT::evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) +{ + throw std::runtime_error("Need specialization of " + getClassName() + + "::evaluate_spin(P,iat,psi,dpsi) (vector quantities)\n"); +} + +// Class concrete types from ValueType +template class SPOSetT; +template class SPOSetT; +template class SPOSetT>; +template class SPOSetT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h new file mode 100644 index 0000000000..4db179dc56 --- /dev/null +++ b/src/QMCWaveFunctions/SPOSetT.h @@ -0,0 +1,612 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign +// Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory +// Mark A. Berrill,berrillma@ornl.gov, Oak Ridge National Laboratory +// William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPOSETT_H +#define QMCPLUSPLUS_SPOSETT_H + +#include "DualAllocatorAliases.hpp" +#include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "OhmmsPETE/OhmmsArray.h" +#include "OptimizableObjectT.h" +#include "Particle/ParticleSet.h" +#include "Particle/VirtualParticleSetT.h" +#include "QMCWaveFunctions/OrbitalSetTraits.h" + +namespace qmcplusplus +{ +class ResourceCollection; + +template +class SPOSetT; +namespace testing +{ +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT& getMyVars(SPOSetT& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); +OptVariablesTypeT>& getMyVars(SPOSetT>& spo); +} // namespace testing + +/** base class for Single-particle orbital sets + * + * SPOSet stands for S(ingle)P(article)O(rbital)Set which contains + * a number of single-particle orbitals with capabilities of evaluating \f$ + * \psi_j({\bf r}_i)\f$ + */ +template +class SPOSetT : public QMCTraits +{ +public: + using ValueVector = typename OrbitalSetTraits::ValueVector; + using ValueMatrix = typename OrbitalSetTraits::ValueMatrix; + using GradVector = typename OrbitalSetTraits::GradVector; + using GradMatrix = typename OrbitalSetTraits::GradMatrix; + using GradType = TinyVector; + using HessVector = typename OrbitalSetTraits::HessVector; + using HessMatrix = typename OrbitalSetTraits::HessMatrix; + using GGGVector = typename OrbitalSetTraits::GradHessVector; + using GGGMatrix = typename OrbitalSetTraits::GradHessMatrix; + using SPOMap = std::map>>; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + using OffloadMWVArray = Array>; // [walker, Orbs] + using PosType = typename OrbitalSetTraits::PosType; + using RealType = typename OrbitalSetTraits::RealType; + using ComplexType = typename OrbitalSetTraits::ComplexType; + using ValueType = typename OrbitalSetTraits::ValueType; + using FullRealType = typename OrbitalSetTraits::RealType; + using FullValueType = typename OrbitalSetTraits::FullValueType; + ; + template + using OffloadMatrix = Matrix>; + + /** constructor */ + SPOSetT(const std::string& my_name); + + /** destructor + * + * Derived class destructor needs to pay extra attention to freeing memory + * shared among clones of SPOSet. + */ + virtual ~SPOSetT() = default; + + /** return the size of the orbital set + * Ye: this needs to be replaced by getOrbitalSetSize(); + */ + inline int size() const { return OrbitalSetSize; } + + /** print basic SPOSet information + */ + void basic_report(const std::string& pad = "") const; + + /** print SPOSet information + */ + virtual void report(const std::string& pad = "") const { basic_report(pad); } + + /** return the size of the orbitals + */ + inline int getOrbitalSetSize() const { return OrbitalSetSize; } + + /// Query if this SPOSet is optimizable + virtual bool isOptimizable() const { return false; } + + /** extract underlying OptimizableObject references + * @param opt_obj_refs aggregated list of optimizable object references + */ + virtual void extractOptimizableObjectRefs(UniqueOptObjRefsT& opt_obj_refs); + + /** check out variational optimizable variables + * @param active a super set of optimizable variables + */ + virtual void checkOutVariables(const OptVariablesTypeT& active); + + /// Query if this SPOSet uses OpenMP offload + virtual bool isOMPoffload() const { return false; } + + /** Query if this SPOSet has an explicit ion dependence. returns true if it + * does. + */ + virtual bool hasIonDerivs() const { return false; } + + /// check a few key parameters before putting the SPO into a determinant + virtual void checkObject() const {} + + /// return true if this SPOSet can be wrappered by RotatedSPO + virtual bool isRotationSupported() const { return false; } + /// store parameters before getting destroyed by rotation. + virtual void storeParamsBeforeRotation() {} + /// apply rotation to all the orbitals + virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false); + + /// Parameter derivatives of the wavefunction and the Laplacian of the + /// wavefunction + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const int& FirstIndex, + const int& LastIndex); + + /// Parameter derivatives of the wavefunction + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + int FirstIndex, + int LastIndex); + + /** Evaluate the derivative of the optimized orbitals with respect to the + * parameters this is used only for MSD, to be refined for better serving + * both single and multi SD + */ + virtual void evaluateDerivatives(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + Vector& dhpsioverpsi, + const T& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const GradMatrix& grads_up, + const GradMatrix& grads_dn, + const ValueMatrix& lapls_up, + const ValueMatrix& lapls_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const GradMatrix& B_grad, + const ValueMatrix& B_lapl, + const std::vector& detData_up, + const size_t N1, + const size_t N2, + const size_t NP1, + const size_t NP2, + const std::vector>& lookup_tbl); + + /** Evaluate the derivative of the optimized orbitals with respect to the + * parameters this is used only for MSD, to be refined for better serving + * both single and multi SD + */ + virtual void evaluateDerivativesWF(ParticleSetT& P, + const OptVariablesTypeT& optvars, + Vector& dlogpsi, + const FullValueType& psiCurrent, + const std::vector& Coeff, + const std::vector& C2node_up, + const std::vector& C2node_dn, + const ValueVector& detValues_up, + const ValueVector& detValues_dn, + const ValueMatrix& M_up, + const ValueMatrix& M_dn, + const ValueMatrix& Minv_up, + const ValueMatrix& Minv_dn, + const std::vector& detData_up, + const std::vector>& lookup_tbl); + + /** set the OrbitalSetSize + * @param norbs number of single-particle orbitals + * Ye: I prefer to remove this interface in the future. SPOSet builders need + * to handle the size correctly. It doesn't make sense allowing to set the + * value at any place in the code. + * @TODO make it purely virtual + */ + virtual void setOrbitalSetSize(int norbs){}; + + /** evaluate the values of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @TODO make it purely virtual + */ + virtual void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi){}; + + /** evaluate determinant ratios for virtual moves, e.g., sphere move for + * nonlocalPP + * @param VP virtual particle set + * @param psi values of the SPO, used as a scratch space if needed + * @param psiinv the row of inverse slater matrix corresponding to the + * particle moved virtually + * @param ratios return determinant ratios + */ + virtual void evaluateDetRatios(const VirtualParticleSetT& VP, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios); + + /// Determinant ratios and parameter derivatives of the wavefunction for + /// virtual moves + virtual void evaluateDerivRatios(const VirtualParticleSetT& VP, + const OptVariablesTypeT& optvars, + ValueVector& psi, + const ValueVector& psiinv, + std::vector& ratios, + Matrix& dratios, + int FirstIndex, + int LastIndex); + + /** evaluate determinant ratios for virtual moves, e.g., sphere move for + * nonlocalPP, of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param vp_list a list of virtual particle sets in a walker batch + * @param psi_list a list of values of the SPO, used as a scratch space if + * needed + * @param invRow_ptr_list a list of pointers to the rows of inverse slater + * matrix corresponding to the particles moved virtually + * @param ratios_list a list of returning determinant ratios + */ + virtual void mw_evaluateDetRatios(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + const RefVector& psi_list, + const std::vector& invRow_ptr_list, + std::vector>& ratios_list) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @TODO make this purely virtual + */ + virtual void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){}; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @param dspin spin gradients of the SPO + */ + virtual void evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin); + + /** evaluate the values this single-particle orbital sets of multiple + * walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + */ + virtual void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + */ + virtual void mw_evaluateVGL(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list) const; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + * @param mw_dspin is a dual matrix of spin gradients [nw][norb] + * Note that the device side of mw_dspin is up to date + */ + virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param psi_ratio_grads_v determinant ratio and grads of all the walkers + */ + virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads) const; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param ratios, ratios of all walkers + * @param grads, spatial gradients of all walkers + * @param spingrads, spin gradients of all walkers + */ + virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const; + + /** evaluate the values, gradients and hessians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param grad_grad_psi hessians of the SPO + */ + virtual void evaluateVGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi); + + /** evaluate the values, gradients, hessians, and grad hessians of this + * single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param grad_grad_psi hessians of the SPO + * @param grad_grad_grad_psi grad hessians of the SPO + */ + virtual void evaluateVGHGH(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + HessVector& grad_grad_psi, + GGGVector& grad_grad_grad_psi); + + /** evaluate the values of this single-particle orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + */ + virtual void evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi); + + /** evaluate the third derivatives of this single-particle orbital set + * @param P current ParticleSet + * @param first first particle + * @param last last particle + * @param grad_grad_grad_logdet third derivatives of the SPO + */ + virtual void evaluateThirdDeriv(const ParticleSetT& P, int first, int last, GGGMatrix& grad_grad_grad_logdet); + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles + * @param[in] P current ParticleSet + * @param[in] first starting index of the particles + * @param[in] last ending index of the particles + * @param[out] logdet determinant matrix to be inverted + * @param[out] dlogdet gradients + * @param[out] d2logdet laplacians + * @TODO make this pure virtual + */ + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet){}; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles, including the spin gradient + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param d2logdet laplacians + * @param dspinlogdet, spin gradients + * + * default implementation will abort for all SPOSets except SpinorSet + * + */ + virtual void evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet); + + virtual void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const; + + /** evaluate the values, gradients and hessians of this single-particle + * orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param grad_grad_logdet hessians + * + */ + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet); + + /** evaluate the values, gradients, hessians and third derivatives of this + * single-particle orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param grad_grad_logdet hessians + * @param grad_grad_grad_logdet third derivatives + * + */ + virtual void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + HessMatrix& grad_grad_logdet, + GGGMatrix& grad_grad_grad_logdet); + + /** evaluate the gradients of this single-particle orbital + * for [first,last) target particles with respect to the given source + * particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients + * + */ + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi); + + /** evaluate the gradients of values, gradients, laplacians of this + * single-particle orbital for [first,last) target particles with respect to + * the given source particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients of values + * @param grad_grad_phi gradients of gradients + * @param grad_lapl_phi gradients of laplacians + * + */ + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& grad_phi, + HessMatrix& grad_grad_phi, + GradMatrix& grad_lapl_phi); + + /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r. + * + * @param[in] P particle set. + * @param[in] iel The electron at which to evaluate phi(r_iel) + * @param[in] source ion particle set. + * @param[in] iat_src ion ID w.r.t. which to take derivative. + * @param[in,out] gradphi Vector of d/dR_iat phi_j(r). + * @return Void + */ + virtual void evaluateGradSourceRow(const ParticleSetT& P, + int iel, + const ParticleSetT& source, + int iat_src, + GradVector& gradphi); + + /** access the k point related to the given orbital */ + virtual PosType get_k(int orb) { return PosType(); } + + /** initialize a shared resource and hand it to collection + */ + virtual void createResource(ResourceCollection& collection) const {} + + /** acquire a shared resource from collection + */ + virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} + + /** return a shared resource to collection + */ + virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const {} + + /** make a clone of itself + * every derived class must implement this to have threading working + * correctly. + */ + [[noreturn]] virtual std::unique_ptr> makeClone() const; + + /** Used only by cusp correction in AOS LCAO. + * Ye: the SoA LCAO moves all this responsibility to the builder. + * This interface should be removed with AoS. + */ + virtual bool transformSPOSet() { return true; } + + /** finalize the construction of SPOSet + * + * for example, classes serving accelerators may need to transfer data from + * host to device after the host side objects are built. + */ + virtual void finalizeConstruction() {} + + /// return object name + const std::string& getName() const { return my_name_; } + + /// @TODO make this purely virutal return class name + virtual std::string getClassName() const { return ""; }; + +protected: + /// name of the object, unique identifier + const std::string my_name_; + /// number of Single-particle orbitals + IndexType OrbitalSetSize; + /// Optimizable variables + OptVariablesTypeT myVars; + + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT& testing::getMyVars(SPOSetT& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); + friend OptVariablesTypeT>& testing::getMyVars(SPOSetT>& spo); +}; + +template +using SPOSetTPtr = SPOSetT*; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/SpinorSet.h b/src/QMCWaveFunctions/SpinorSet.h index 63ebf5c688..91a22a4320 100644 --- a/src/QMCWaveFunctions/SpinorSet.h +++ b/src/QMCWaveFunctions/SpinorSet.h @@ -13,204 +13,12 @@ #ifndef QMCPLUSPLUS_SPINORSET_H #define QMCPLUSPLUS_SPINORSET_H -#include "QMCWaveFunctions/SPOSet.h" -#include +#include "Configuration.h" +#include "QMCWaveFunctions/SpinorSetT.h" namespace qmcplusplus { -/** Class for Melton & Mitas style Spinors. - * - */ -class SpinorSet : public SPOSet -{ -public: - /** constructor */ - SpinorSet(const std::string& my_name); - ~SpinorSet() override; - - std::string getClassName() const override { return "SpinorSet"; } - bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); } - bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); } - bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); } - - //This class is initialized by separately building the up and down channels of the spinor set and - //then registering them. - void set_spos(std::unique_ptr&& up, std::unique_ptr&& dn); - - /** set the OrbitalSetSize - * @param norbs number of single-particle orbitals - */ - void setOrbitalSetSize(int norbs) override; - - /** evaluate the values of this spinor set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - */ - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - */ - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital set - * @param P current ParticleSet - * @param iat active particle - * @param psi values of the SPO - * @param dpsi gradients of the SPO - * @param d2psi laplacians of the SPO - * @param dspin spin gradient of the SPO - */ - void evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) override; - - /** evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param psi_v_list the list of value vector pointers in a walker batch - * @param dpsi_v_list the list of gradient vector pointers in a walker batch - * @param d2psi_v_list the list of laplacian vector pointers in a walker batch - * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals - */ - void mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const override; - - - /** evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio - * and grads of multiple walkers. Device data of phi_vgl_v must be up-to-date upon return. - * Includes spin gradients - * @param spo_list the list of SPOSet pointers in a walker batch - * @param P_list the list of ParticleSet pointers in a walker batch - * @param iat active particle - * @param phi_vgl_v orbital values, gradients and laplacians of all the walkers - * @param ratios, ratios of all walkers - * @param grads, spatial gradients of all walkers - * @param spingrads, spin gradients of all walkers - */ - void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const override; - - /** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param logdet determinant matrix to be inverted - * @param dlogdet gradients - * @param d2logdet laplacians - * - */ - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - - void mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const override; - - void evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) override; - /** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat. - * @param P current particle set. - * @param iat electron index. - * @param spinor values. - * @param spin gradient values. d/ds phi(r,s). - * - */ - void evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) override; - - /** evaluate the gradients of this single-particle orbital - * for [first,last) target particles with respect to the given source particle - * @param P current ParticleSet - * @param first starting index of the particles - * @param last ending index of the particles - * @param iat_src source particle index - * @param gradphi gradients - * - */ - virtual void evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) override; - - std::unique_ptr makeClone() const override; - - void createResource(ResourceCollection& collection) const override; - - void acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - void releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const override; - - /// check if the multi walker resource is owned. For testing only. - bool isResourceOwned() const { return bool(mw_res_handle_); } - -private: - struct SpinorSetMultiWalkerResource; - ResourceHandle mw_res_handle_; - - std::pair, RefVectorWithLeader> extractSpinComponentRefList( - const RefVectorWithLeader& spo_list) const; - - //Sposet for the up and down channels of our spinors. - std::unique_ptr spo_up; - std::unique_ptr spo_dn; - - //temporary arrays for holding the values of the up and down channels respectively. - ValueVector psi_work_up; - ValueVector psi_work_down; - - //temporary arrays for holding the gradients of the up and down channels respectively. - GradVector dpsi_work_up; - GradVector dpsi_work_down; - - //temporary arrays for holding the laplacians of the up and down channels respectively. - ValueVector d2psi_work_up; - ValueVector d2psi_work_down; - - //Same as above, but these are the full matrices containing all spinor/particle combinations. - ValueMatrix logpsi_work_up; - ValueMatrix logpsi_work_down; - - GradMatrix dlogpsi_work_up; - GradMatrix dlogpsi_work_down; - - ValueMatrix d2logpsi_work_up; - ValueMatrix d2logpsi_work_down; -}; +using SpinorSet = SpinorSetT; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/SpinorSet.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp similarity index 55% rename from src/QMCWaveFunctions/SpinorSet.cpp rename to src/QMCWaveFunctions/SpinorSetT.cpp index 4f0531659b..e1fe6fd374 100644 --- a/src/QMCWaveFunctions/SpinorSet.cpp +++ b/src/QMCWaveFunctions/SpinorSetT.cpp @@ -10,54 +10,66 @@ // File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories ////////////////////////////////////////////////////////////////////////////////////// -#include "SpinorSet.h" -#include "Utilities/ResourceCollection.h" +#include "SpinorSetT.h" + #include "Platforms/OMPTarget/OMPTargetMath.hpp" +#include "Utilities/ResourceCollection.h" namespace qmcplusplus { -struct SpinorSet::SpinorSetMultiWalkerResource : public Resource +template +struct SpinorSetT::SpinorSetMultiWalkerResource : public Resource { SpinorSetMultiWalkerResource() : Resource("SpinorSet") {} SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {} std::unique_ptr makeClone() const override { return std::make_unique(*this); } OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v; - std::vector up_ratios, dn_ratios; + std::vector up_ratios, dn_ratios; std::vector up_grads, dn_grads; std::vector spins; }; -SpinorSet::SpinorSet(const std::string& my_name) : SPOSet(my_name), spo_up(nullptr), spo_dn(nullptr) {} -SpinorSet::~SpinorSet() = default; +template +SpinorSetT::SpinorSetT(const std::string& my_name) : SPOSetT(my_name), spo_up(nullptr), spo_dn(nullptr) +{} -void SpinorSet::set_spos(std::unique_ptr&& up, std::unique_ptr&& dn) +template +SpinorSetT::~SpinorSetT() = default; + +template +void SpinorSetT::set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn) { - //Sanity check for input SPO's. They need to be the same size or + // Sanity check for input SPO's. They need to be the same size or IndexType spo_size_up = up->getOrbitalSetSize(); IndexType spo_size_down = dn->getOrbitalSetSize(); if (spo_size_up != spo_size_down) - throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO components have different sizes."); + throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO " + "components have different sizes."); setOrbitalSetSize(spo_size_up); spo_up = std::move(up); spo_dn = std::move(dn); - psi_work_up.resize(OrbitalSetSize); - psi_work_down.resize(OrbitalSetSize); + psi_work_up.resize(this->OrbitalSetSize); + psi_work_down.resize(this->OrbitalSetSize); - dpsi_work_up.resize(OrbitalSetSize); - dpsi_work_down.resize(OrbitalSetSize); + dpsi_work_up.resize(this->OrbitalSetSize); + dpsi_work_down.resize(this->OrbitalSetSize); - d2psi_work_up.resize(OrbitalSetSize); - d2psi_work_down.resize(OrbitalSetSize); + d2psi_work_up.resize(this->OrbitalSetSize); + d2psi_work_down.resize(this->OrbitalSetSize); } -void SpinorSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; }; - +template +void SpinorSetT::setOrbitalSetSize(int norbs) +{ + this->OrbitalSetSize = norbs; +}; -void SpinorSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) +template +void SpinorSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) { psi_work_up = 0.0; psi_work_down = 0.0; @@ -65,21 +77,27 @@ void SpinorSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) spo_up->evaluateValue(P, iat, psi_work_up); spo_dn->evaluateValue(P, iat, psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - //This is only supported in the complex build, so ValueType is some complex number depending on the precision. - ValueType eis(coss, sins); - ValueType emis(coss, -sins); + // This is only supported in the complex build, so T is some complex number + // depending on the precision. + T eis(coss, sins); + T emis(coss, -sins); psi = eis * psi_work_up + emis * psi_work_down; } -void SpinorSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +template +void SpinorSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) { psi_work_up = 0.0; psi_work_down = 0.0; @@ -91,27 +109,28 @@ void SpinorSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, Gra spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); + T eis(coss, sins); + T emis(coss, -sins); psi = eis * psi_work_up + emis * psi_work_down; dpsi = eis * dpsi_work_up + emis * dpsi_work_down; d2psi = eis * d2psi_work_up + emis * d2psi_work_down; } -void SpinorSet::evaluateVGL_spin(const ParticleSet& P, - int iat, - ValueVector& psi, - GradVector& dpsi, - ValueVector& d2psi, - ValueVector& dspin) +template +void SpinorSetT::evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin) { psi_work_up = 0.0; psi_work_down = 0.0; @@ -123,16 +142,16 @@ void SpinorSet::evaluateVGL_spin(const ParticleSet& P, spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up); spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); psi = eis * psi_work_up + emis * psi_work_down; dpsi = eis * dpsi_work_up + emis * dpsi_work_down; @@ -140,15 +159,16 @@ void SpinorSet::evaluateVGL_spin(const ParticleSet& P, dspin = eye * (eis * psi_work_up - emis * psi_work_down); } -void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list, - const RefVector& dpsi_v_list, - const RefVector& d2psi_v_list, - OffloadMatrix& mw_dspin) const +template +void SpinorSetT::mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const { - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader>(); auto& P_leader = P_list.getLeader(); assert(this == &spo_leader); @@ -162,7 +182,7 @@ void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_li RefVector up_d2psi_v_list, dn_d2psi_v_list; for (int iw = 0; iw < nw; iw++) { - auto& spo = spo_list.getCastedElement(iw); + auto& spo = spo_list.template getCastedElement>(iw); up_psi_v_list.push_back(spo.psi_work_up); dn_psi_v_list.push_back(spo.psi_work_down); up_dpsi_v_list.push_back(spo.dpsi_work_up); @@ -176,37 +196,39 @@ void SpinorSet::mw_evaluateVGLWithSpin(const RefVectorWithLeader& spo_li for (int iw = 0; iw < nw; iw++) { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); psi_v_list[iw].get() = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get(); dpsi_v_list[iw].get() = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get(); d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get(); - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]); } - //Data above is all on host, but since mw_dspin is DualMatrix we need to sync the host and device + // Data above is all on host, but since mw_dspin is DualMatrix we need to + // sync the host and device mw_dspin.updateTo(); } -void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const std::vector& invRow_ptr_list, - OffloadMWVGLArray& phi_vgl_v, - std::vector& ratios, - std::vector& grads, - std::vector& spingrads) const +template +void SpinorSetT::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const { - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader>(); auto& P_leader = P_list.getLeader(); assert(this == &spo_leader); - assert(phi_vgl_v.size(0) == DIM_VGL); + assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL); assert(phi_vgl_v.size(1) == spo_list.size()); const size_t nw = spo_list.size(); const size_t norb_requested = phi_vgl_v.size(2); @@ -220,8 +242,8 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader auto& dn_grads = mw_res.dn_grads; auto& spins = mw_res.spins; - up_phi_vgl_v.resize(DIM_VGL, nw, norb_requested); - dn_phi_vgl_v.resize(DIM_VGL, nw, norb_requested); + up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); + dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested); up_ratios.resize(nw); dn_ratios.resize(nw); up_grads.resize(nw); @@ -238,14 +260,14 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader dn_grads); for (int iw = 0; iw < nw; iw++) { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - spins[iw] = s; - RealType coss = std::cos(s); - RealType sins = std::sin(s); + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + spins[iw] = s; + RealType coss = std::cos(s); + RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw]; grads[iw] = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw]; @@ -253,7 +275,7 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader } auto* spins_ptr = spins.data(); - //This data lives on the device + // This data lives on the device auto* phi_vgl_ptr = phi_vgl_v.data(); auto* up_phi_vgl_ptr = up_phi_vgl_v.data(); auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data(); @@ -262,9 +284,9 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader { RealType c, s; omptarget::sincos(spins_ptr[iw], &s, &c); - ValueType eis(c, s), emis(c, -s); + T eis(c, s), emis(c, -s); PRAGMA_OFFLOAD("omp parallel for collapse(2)") - for (int idim = 0; idim < DIM_VGL; idim++) + for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++) for (int iorb = 0; iorb < norb_requested; iorb++) { auto offset = idim * nw * norb_requested + iw * norb_requested + iorb; @@ -273,41 +295,41 @@ void SpinorSet::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader } } -void SpinorSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) +template +void SpinorSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) { IndexType nelec = P.getTotalNum(); - logpsi_work_up.resize(nelec, OrbitalSetSize); - logpsi_work_down.resize(nelec, OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_up.resize(nelec, OrbitalSetSize); - dlogpsi_work_down.resize(nelec, OrbitalSetSize); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_up.resize(nelec, OrbitalSetSize); - d2logpsi_work_down.resize(nelec, OrbitalSetSize); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - for (int iat = 0; iat < nelec; iat++) { - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); + T eis(coss, sins); + T emis(coss, -sins); - for (int no = 0; no < OrbitalSetSize; no++) + for (int no = 0; no < this->OrbitalSetSize; no++) { logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); @@ -316,15 +338,16 @@ void SpinorSet::evaluate_notranspose(const ParticleSet& P, } } -void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int first, - int last, - const RefVector& logdet_list, - const RefVector& dlogdet_list, - const RefVector& d2logdet_list) const +template +void SpinorSetT::mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const { - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader>(); auto& P_leader = P_list.getLeader(); assert(this == &spo_leader); @@ -355,8 +378,8 @@ void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_l up_d2logdet_list.reserve(nw); dn_d2logdet_list.reserve(nw); - ValueMatrix tmp_val_mat(nelec, OrbitalSetSize); - GradMatrix tmp_grad_mat(nelec, OrbitalSetSize); + ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize); + GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize); for (int iw = 0; iw < nw; iw++) { mw_up_logdet.emplace_back(tmp_val_mat); @@ -383,13 +406,13 @@ void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_l for (int iw = 0; iw < nw; iw++) for (int iat = 0; iat < nelec; iat++) { - ParticleSet::Scalar_t s = P_list[iw].activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); + typename ParticleSetT::Scalar_t s = P_list[iw].activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); - for (int no = 0; no < OrbitalSetSize; no++) + for (int no = 0; no < this->OrbitalSetSize; no++) { logdet_list[iw].get()(iat, no) = eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no); @@ -401,43 +424,43 @@ void SpinorSet::mw_evaluate_notranspose(const RefVectorWithLeader& spo_l } } -void SpinorSet::evaluate_notranspose_spin(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet, - ValueMatrix& dspinlogdet) +template +void SpinorSetT::evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) { IndexType nelec = P.getTotalNum(); - logpsi_work_up.resize(nelec, OrbitalSetSize); - logpsi_work_down.resize(nelec, OrbitalSetSize); + logpsi_work_up.resize(nelec, this->OrbitalSetSize); + logpsi_work_down.resize(nelec, this->OrbitalSetSize); - dlogpsi_work_up.resize(nelec, OrbitalSetSize); - dlogpsi_work_down.resize(nelec, OrbitalSetSize); + dlogpsi_work_up.resize(nelec, this->OrbitalSetSize); + dlogpsi_work_down.resize(nelec, this->OrbitalSetSize); - d2logpsi_work_up.resize(nelec, OrbitalSetSize); - d2logpsi_work_down.resize(nelec, OrbitalSetSize); + d2logpsi_work_up.resize(nelec, this->OrbitalSetSize); + d2logpsi_work_down.resize(nelec, this->OrbitalSetSize); spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up); spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down); - for (int iat = 0; iat < nelec; iat++) { - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); - for (int no = 0; no < OrbitalSetSize; no++) + for (int no = 0; no < this->OrbitalSetSize; no++) { logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no); dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no); @@ -447,8 +470,8 @@ void SpinorSet::evaluate_notranspose_spin(const ParticleSet& P, } } - -void SpinorSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, ValueVector& dpsi) +template +void SpinorSetT::evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) { psi_work_up = 0.0; psi_work_down = 0.0; @@ -456,67 +479,72 @@ void SpinorSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector& psi, V spo_up->evaluateValue(P, iat, psi_work_up); spo_dn->evaluateValue(P, iat, psi_work_down); - ParticleSet::Scalar_t s = P.activeSpin(iat); + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); RealType coss(0.0), sins(0.0); coss = std::cos(s); sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - ValueType eye(0, 1.0); + T eis(coss, sins); + T emis(coss, -sins); + T eye(0, 1.0); psi = eis * psi_work_up + emis * psi_work_down; dpsi = eye * (eis * psi_work_up - emis * psi_work_down); } -void SpinorSet::evaluateGradSource(const ParticleSet& P, - int first, - int last, - const ParticleSet& source, - int iat_src, - GradMatrix& gradphi) +template +void SpinorSetT::evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) { IndexType nelec = P.getTotalNum(); - GradMatrix gradphi_up(nelec, OrbitalSetSize); - GradMatrix gradphi_dn(nelec, OrbitalSetSize); + GradMatrix gradphi_up(nelec, this->OrbitalSetSize); + GradMatrix gradphi_dn(nelec, this->OrbitalSetSize); spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up); spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn); for (int iat = 0; iat < nelec; iat++) { - ParticleSet::Scalar_t s = P.activeSpin(iat); - RealType coss = std::cos(s); - RealType sins = std::sin(s); - ValueType eis(coss, sins); - ValueType emis(coss, -sins); - for (int imo = 0; imo < OrbitalSetSize; imo++) + typename ParticleSetT::Scalar_t s = P.activeSpin(iat); + RealType coss = std::cos(s); + RealType sins = std::sin(s); + T eis(coss, sins); + T emis(coss, -sins); + for (int imo = 0; imo < this->OrbitalSetSize; imo++) gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis; } } -std::unique_ptr SpinorSet::makeClone() const +template +std::unique_ptr> SpinorSetT::makeClone() const { - auto myclone = std::make_unique(my_name_); - std::unique_ptr cloneup(spo_up->makeClone()); - std::unique_ptr clonedn(spo_dn->makeClone()); + auto myclone = std::make_unique>(this->my_name_); + std::unique_ptr> cloneup(spo_up->makeClone()); + std::unique_ptr> clonedn(spo_dn->makeClone()); myclone->set_spos(std::move(cloneup), std::move(clonedn)); return myclone; } -void SpinorSet::createResource(ResourceCollection& collection) const +template +void SpinorSetT::createResource(ResourceCollection& collection) const { spo_up->createResource(collection); spo_dn->createResource(collection); auto index = collection.addResource(std::make_unique()); } -void SpinorSet::acquireResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const +template +void SpinorSetT::acquireResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader>(); auto& up_spo_leader = up_spo_list.getLeader(); auto& dn_spo_leader = dn_spo_list.getLeader(); up_spo_leader.acquireResource(collection, up_spo_list); @@ -524,10 +552,12 @@ void SpinorSet::acquireResource(ResourceCollection& collection, const RefVectorW spo_leader.mw_res_handle_ = collection.lendResource(); } -void SpinorSet::releaseResource(ResourceCollection& collection, const RefVectorWithLeader& spo_list) const +template +void SpinorSetT::releaseResource(ResourceCollection& collection, + const RefVectorWithLeader>& spo_list) const { auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list); - auto& spo_leader = spo_list.getCastedLeader(); + auto& spo_leader = spo_list.template getCastedLeader>(); auto& up_spo_leader = up_spo_list.getLeader(); auto& dn_spo_leader = dn_spo_list.getLeader(); up_spo_leader.releaseResource(collection, up_spo_list); @@ -535,24 +565,28 @@ void SpinorSet::releaseResource(ResourceCollection& collection, const RefVectorW collection.takebackResource(spo_leader.mw_res_handle_); } -std::pair, RefVectorWithLeader> SpinorSet::extractSpinComponentRefList( - const RefVectorWithLeader& spo_list) const +template +std::pair>, RefVectorWithLeader>> SpinorSetT::extractSpinComponentRefList( + const RefVectorWithLeader>& spo_list) const { - auto& spo_leader = spo_list.getCastedLeader(); - IndexType nw = spo_list.size(); - SPOSet& up_spo_leader = *(spo_leader.spo_up); - SPOSet& dn_spo_leader = *(spo_leader.spo_dn); - RefVectorWithLeader up_spo_list(up_spo_leader); - RefVectorWithLeader dn_spo_list(dn_spo_leader); + SpinorSetT& spo_leader = spo_list.template getCastedLeader>(); + IndexType nw = spo_list.size(); + SPOSetT& up_spo_leader = *(spo_leader.spo_up); + SPOSetT& dn_spo_leader = *(spo_leader.spo_dn); + RefVectorWithLeader> up_spo_list(up_spo_leader); + RefVectorWithLeader> dn_spo_list(dn_spo_leader); up_spo_list.reserve(nw); dn_spo_list.reserve(nw); for (int iw = 0; iw < nw; iw++) { - SpinorSet& spinor = spo_list.getCastedElement(iw); + SpinorSetT& spinor = spo_list.template getCastedElement>(iw); up_spo_list.emplace_back(*(spinor.spo_up)); dn_spo_list.emplace_back(*(spinor.spo_dn)); } return std::make_pair(up_spo_list, dn_spo_list); } +template class SpinorSetT>; +template class SpinorSetT>; + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h new file mode 100644 index 0000000000..25c1cc956f --- /dev/null +++ b/src/QMCWaveFunctions/SpinorSetT.h @@ -0,0 +1,242 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers +// +// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +// Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPINORSETT_H +#define QMCPLUSPLUS_SPINORSETT_H + +#include "QMCWaveFunctions/SPOSetT.h" +#include "ResourceHandle.h" + +namespace qmcplusplus +{ +/** Class for Melton & Mitas style Spinors. + * + */ +template +class SpinorSetT : public SPOSetT +{ +public: + using ValueMatrix = typename SPOSetT::ValueMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + using GradVector = typename SPOSetT::GradVector; + using OffloadMWVGLArray = Array>; // [VGL, walker, Orbs] + // using OffloadMWVGLArray = typename SPOSetT::template + // OffloadMWCGLArray; + template + using OffloadMatrix = typename SPOSetT::template OffloadMatrix
; + using RealType = typename SPOSetT::RealType; + using ComplexType = typename SPOSetT::ComplexType; + using IndexType = OHMMS_INDEXTYPE; + + /** constructor */ + SpinorSetT(const std::string& my_name); + ~SpinorSetT() override; + + std::string getClassName() const override { return "SpinorSet"; } + bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); } + bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); } + bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); } + + // This class is initialized by separately building the up and down channels + // of the spinor set and then registering them. + void set_spos(std::unique_ptr>&& up, std::unique_ptr>&& dn); + + /** set the OrbitalSetSize + * @param norbs number of single-particle orbitals + */ + void setOrbitalSetSize(int norbs) override; + + /** evaluate the values of this spinor set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + */ + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + */ + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital set + * @param P current ParticleSet + * @param iat active particle + * @param psi values of the SPO + * @param dpsi gradients of the SPO + * @param d2psi laplacians of the SPO + * @param dspin spin gradient of the SPO + */ + void evaluateVGL_spin(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi, + ValueVector& dspin) override; + + /** evaluate the values, gradients and laplacians and spin gradient of this + * single-particle orbital sets of multiple walkers + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param psi_v_list the list of value vector pointers in a walker batch + * @param dpsi_v_list the list of gradient vector pointers in a walker batch + * @param d2psi_v_list the list of laplacian vector pointers in a walker + * batch + * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals + */ + void mw_evaluateVGLWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list, + const RefVector& dpsi_v_list, + const RefVector& d2psi_v_list, + OffloadMatrix& mw_dspin) const override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital sets and determinant ratio and grads of multiple walkers. Device + * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients + * @param spo_list the list of SPOSet pointers in a walker batch + * @param P_list the list of ParticleSet pointers in a walker batch + * @param iat active particle + * @param phi_vgl_v orbital values, gradients and laplacians of all the + * walkers + * @param ratios, ratios of all walkers + * @param grads, spatial gradients of all walkers + * @param spingrads, spin gradients of all walkers + */ + void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const std::vector& invRow_ptr_list, + OffloadMWVGLArray& phi_vgl_v, + std::vector& ratios, + std::vector& grads, + std::vector& spingrads) const override; + + /** evaluate the values, gradients and laplacians of this single-particle + * orbital for [first,last) particles + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param logdet determinant matrix to be inverted + * @param dlogdet gradients + * @param d2logdet laplacians + * + */ + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; + + void mw_evaluate_notranspose(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int first, + int last, + const RefVector& logdet_list, + const RefVector& dlogdet_list, + const RefVector& d2logdet_list) const override; + + void evaluate_notranspose_spin(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet, + ValueMatrix& dspinlogdet) override; + /** Evaluate the values, spin gradients, and spin laplacians of single + * particle spinors corresponding to electron iat. + * @param P current particle set. + * @param iat electron index. + * @param spinor values. + * @param spin gradient values. d/ds phi(r,s). + * + */ + void evaluate_spin(const ParticleSetT& P, int iat, ValueVector& psi, ValueVector& dpsi) override; + + /** evaluate the gradients of this single-particle orbital + * for [first,last) target particles with respect to the given source + * particle + * @param P current ParticleSet + * @param first starting index of the particles + * @param last ending index of the particles + * @param iat_src source particle index + * @param gradphi gradients + * + */ + virtual void evaluateGradSource(const ParticleSetT& P, + int first, + int last, + const ParticleSetT& source, + int iat_src, + GradMatrix& gradphi) override; + + std::unique_ptr> makeClone() const override; + + void createResource(ResourceCollection& collection) const override; + + void acquireResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; + + void releaseResource(ResourceCollection& collection, const RefVectorWithLeader>& spo_list) const override; + + /// check if the multi walker resource is owned. For testing only. + bool isResourceOwned() const { return bool(mw_res_handle_); } + +private: + struct SpinorSetMultiWalkerResource; + ResourceHandle mw_res_handle_; + + std::pair>, RefVectorWithLeader>> extractSpinComponentRefList( + const RefVectorWithLeader>& spo_list) const; + + // Sposet for the up and down channels of our spinors. + std::unique_ptr> spo_up; + std::unique_ptr> spo_dn; + + // temporary arrays for holding the values of the up and down channels + // respectively. + ValueVector psi_work_up; + ValueVector psi_work_down; + + // temporary arrays for holding the gradients of the up and down channels + // respectively. + GradVector dpsi_work_up; + GradVector dpsi_work_down; + + // temporary arrays for holding the laplacians of the up and down channels + // respectively. + ValueVector d2psi_work_up; + ValueVector d2psi_work_down; + + // Same as above, but these are the full matrices containing all + // spinor/particle combinations. + ValueMatrix logpsi_work_up; + ValueMatrix logpsi_work_down; + + GradMatrix dlogpsi_work_up; + GradMatrix dlogpsi_work_down; + + ValueMatrix d2logpsi_work_up; + ValueMatrix d2logpsi_work_down; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/TrialWaveFunction.h b/src/QMCWaveFunctions/TrialWaveFunction.h index b59b483f8d..6a5e166e75 100644 --- a/src/QMCWaveFunctions/TrialWaveFunction.h +++ b/src/QMCWaveFunctions/TrialWaveFunction.h @@ -31,6 +31,7 @@ #include "QMCWaveFunctions/TWFFastDerivWrapper.h" #include "TWFGrads.hpp" #include "Utilities/RuntimeOptions.h" +#include "SPOSetT.h" /**@defgroup MBWfs Many-body wave function group * @brief Classes to handle many-body trial wave functions diff --git a/src/QMCWaveFunctions/VariableSet.h b/src/QMCWaveFunctions/VariableSet.h index 8845f9a7cd..c2c88a271c 100644 --- a/src/QMCWaveFunctions/VariableSet.h +++ b/src/QMCWaveFunctions/VariableSet.h @@ -12,322 +12,15 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// - #ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H #define QMCPLUSPLUS_OPTIMIZE_VARIABLESET_H -#include "config.h" -#include -#include -#include -#include -#include "Configuration.h" -namespace qmcplusplus -{ -class hdf_archive; -} +#include "Configuration.h" +#include "QMCWaveFunctions/VariableSetT.h" namespace optimize { -/** An enum useful for determining the type of parameter is being optimized. -* knowing this in the opt routine can reduce the computational load. -*/ -enum -{ - OTHER_P = 0, - LOGLINEAR_P, //B-spline Jastrows - LOGLINEAR_K, //K space Jastrows - LINEAR_P, //Multi-determinant coefficients - SPO_P, //SPO set Parameters - BACKFLOW_P //Backflow parameters -}; - -/** class to handle a set of variables that can be modified during optimizations - * - * A serialized container of named variables. - */ -struct VariableSet -{ - using real_type = qmcplusplus::QMCTraits::RealType; - - using pair_type = std::pair; - using index_pair_type = std::pair; - using iterator = std::vector::iterator; - using const_iterator = std::vector::const_iterator; - using size_type = std::vector::size_type; - - ///number of active variables - int num_active_vars; - /** store locator of the named variable - * - * if(Index[i] == -1), the named variable is not active - */ - std::vector Index; - std::vector NameAndValue; - std::vector ParameterType; - std::vector Recompute; - - ///default constructor - inline VariableSet() : num_active_vars(0) {} - ///viturval destructor for safety - virtual ~VariableSet() = default; - /** if any of Index value is not zero, return true - */ - inline bool is_optimizable() const { return num_active_vars > 0; } - ///return the number of active variables - inline int size_of_active() const { return num_active_vars; } - ///return the first const_iterator - inline const_iterator begin() const { return NameAndValue.begin(); } - ///return the last const_iterator - inline const_iterator end() const { return NameAndValue.end(); } - ///return the first iterator - inline iterator begin() { return NameAndValue.begin(); } - ///return the last iterator - inline iterator end() { return NameAndValue.end(); } - ///return the size - inline size_type size() const { return NameAndValue.size(); } - ///return the locator of the i-th Index - inline int where(int i) const { return Index[i]; } - /** return the iterator of a named parameter - * @param vname name of a parameter - * @return the locator of vname - * - * If vname is not found among the Names, return NameAndValue.end() - * so that ::end() member function can be used to validate the iterator. - */ - inline iterator find(const std::string& vname) - { - return std::find_if(NameAndValue.begin(), NameAndValue.end(), - [&vname](const auto& value) { return value.first == vname; }); - } - - /** return the Index vaule for the named parameter - * @param vname name of the variable - * - * If vname is not found in this variables, return -1; - */ - int getIndex(const std::string& vname) const; - - /* return the NameAndValue index for the named parameter - * @ param vname name of the variable - * - * Differs from getIndex by not relying on the indices cached in Index - * myVars[i] will always return the value of the parameter if it is stored - * regardless of whether or not the Index array has been correctly reset - * - * if vname is not found, return -1 - * - */ - inline int getLoc(const std::string& vname) const - { - int loc = 0; - while (loc != NameAndValue.size()) - { - if (NameAndValue[loc].first == vname) - return loc; - ++loc; - } - return -1; - } - - inline void insert(const std::string& vname, real_type v, bool enable = true, int type = OTHER_P) - { - iterator loc = find(vname); - int ind_loc = loc - NameAndValue.begin(); - if (loc == NameAndValue.end()) // && enable==true) - { - Index.push_back(ind_loc); - NameAndValue.push_back(pair_type(vname, v)); - ParameterType.push_back(index_pair_type(vname, type)); - Recompute.push_back(index_pair_type(vname, 1)); - } - //disable it if enable == false - if (!enable) - Index[ind_loc] = -1; - } - - inline void setParameterType(int type) - { - std::vector::iterator PTit(ParameterType.begin()), PTend(ParameterType.end()); - while (PTit != PTend) - { - (*PTit).second = type; - PTit++; - } - } - - inline void getParameterTypeList(std::vector& types) const - { - auto ptit(ParameterType.begin()), ptend(ParameterType.end()); - types.resize(ptend - ptit); - auto tit(types.begin()); - while (ptit != ptend) - (*tit++) = (*ptit++).second; - } - - - /** equivalent to std::map[string] operator - */ - inline real_type& operator[](const std::string& vname) - { - iterator loc = find(vname); - if (loc == NameAndValue.end()) - { - Index.push_back(-1); - NameAndValue.push_back(pair_type(vname, 0)); - ParameterType.push_back(index_pair_type(vname, 0)); - Recompute.push_back(index_pair_type(vname, 1)); - return NameAndValue.back().second; - } - return (*loc).second; - } - - - /** return the name of i-th variable - * @param i index - */ - const std::string& name(int i) const { return NameAndValue[i].first; } - - /** return the i-th value - * @param i index - */ - inline real_type operator[](int i) const { return NameAndValue[i].second; } - - /** assign the i-th value - * @param i index - */ - inline real_type& operator[](int i) { return NameAndValue[i].second; } - - /** get the i-th parameter's type - * @param i index - */ - inline int getType(int i) const { return ParameterType[i].second; } - - inline bool recompute(int i) const { return (Recompute[i].second == 1); } - - inline int& recompute(int i) { return Recompute[i].second; } - - inline void setComputed() - { - for (int i = 0; i < Recompute.size(); i++) - { - if (ParameterType[i].second == LOGLINEAR_P) - Recompute[i].second = 0; - else if (ParameterType[i].second == LOGLINEAR_K) - Recompute[i].second = 0; - else - Recompute[i].second = 1; - } - } - - inline void setRecompute() - { - for (int i = 0; i < Recompute.size(); i++) - Recompute[i].second = 1; - } - - /** clear the variable set - * - * Remove all the data. - */ - void clear(); - - /** insert a VariableSet to the list - * @param input variables - */ - void insertFrom(const VariableSet& input); - - /** sum together the values of the optimizable parameter values in - * two VariableSet objects, and set this object's values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromSum(const VariableSet& input_1, const VariableSet& input_2); - - /** take the difference (input_1-input_2) of values of the optimizable - * parameter values in two VariableSet objects, and set this object's - * values to equal them. - * @param first set of input variables - * @param second set of input variables - */ - void insertFromDiff(const VariableSet& input_1, const VariableSet& input_2); - - /** activate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, Index is updated - * - * The status of a variable that is not included in the [first,last) - * remains the same. - */ - template - void activate(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - iterator loc = find(*first++); - if (loc != NameAndValue.end()) - { - int i = loc - NameAndValue.begin(); - if (Index[i] < 0) - Index[i] = num_active_vars++; - } - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** deactivate variables for optimization - * @param first iterator of the first name - * @param last iterator of the last name - * @param reindex if true, the variales are removed and Index is updated - */ - template - void disable(ForwardIterator first, ForwardIterator last, bool reindex) - { - while (first != last) - { - int loc = find(*first++) - NameAndValue.begin(); - if (loc < NameAndValue.size()) - Index[loc] = -1; - } - if (reindex) - { - removeInactive(); - resetIndex(); - } - } - - /** reset Index - */ - void resetIndex(); - /** remove inactive variables and trim the internal data - */ - void removeInactive(); - - /** set the index table of this VariableSet - * @param selected input variables - * - * This VariableSet is a subset of selected. - */ - void getIndex(const VariableSet& selected); - - /** set default Indices, namely all the variables are active - */ - void setIndexDefault(); - - void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; - - // Save variational parameters to an HDF file - void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const; - - /// Read variational parameters from an HDF file. - /// This assumes VariableSet is already set up. - void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin); -}; +using VariableSet = VariableSetT; } // namespace optimize #endif diff --git a/src/QMCWaveFunctions/VariableSet.cpp b/src/QMCWaveFunctions/VariableSetT.cpp similarity index 81% rename from src/QMCWaveFunctions/VariableSet.cpp rename to src/QMCWaveFunctions/VariableSetT.cpp index 3528f8dd1e..6986bb6c14 100644 --- a/src/QMCWaveFunctions/VariableSet.cpp +++ b/src/QMCWaveFunctions/VariableSetT.cpp @@ -11,21 +11,23 @@ // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign ////////////////////////////////////////////////////////////////////////////////////// +#include "VariableSetT.h" -#include "VariableSet.h" -#include "io/hdf/hdf_archive.h" #include "Host/sysutil.h" -#include -#include +#include "io/hdf/hdf_archive.h" + +#include #include #include -#include +#include +#include using std::setw; namespace optimize { -void VariableSet::clear() +template +void VariableSetT::clear() { num_active_vars = 0; Index.clear(); @@ -34,7 +36,8 @@ void VariableSet::clear() ParameterType.clear(); } -void VariableSet::insertFrom(const VariableSet& input) +template +void VariableSetT::insertFrom(const VariableSetT& input) { for (int i = 0; i < input.size(); ++i) { @@ -52,7 +55,8 @@ void VariableSet::insertFrom(const VariableSet& input) num_active_vars = input.num_active_vars; } -void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& input_2) +template +void VariableSetT::insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2) { real_type sum_val; std::string vname; @@ -65,8 +69,9 @@ void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& i for (int i = 0; i < input_1.size(); ++i) { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be adding them. + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // adding them. if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) throw std::runtime_error("Inconsistent parameters exist in the two provided " "variable sets."); @@ -81,8 +86,8 @@ void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& i Recompute.push_back(input_1.Recompute[i]); // We can reuse the above values, which aren't summed between the - // objects, but the parameter values themselves need to use the summed - // values. + // objects, but the parameter values themselves need to use the + // summed values. vname = input_1.NameAndValue[i].first; NameAndValue.push_back(pair_type(vname, sum_val)); } @@ -92,7 +97,8 @@ void VariableSet::insertFromSum(const VariableSet& input_1, const VariableSet& i num_active_vars = input_1.num_active_vars; } -void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& input_2) +template +void VariableSetT::insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2) { real_type diff_val; std::string vname; @@ -105,8 +111,9 @@ void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& for (int i = 0; i < input_1.size(); ++i) { - // Check that each of the equivalent variables in both VariableSet objects - // have the same name - otherwise we certainly shouldn't be subtracting them. + // Check that each of the equivalent variables in both VariableSet + // objects have the same name - otherwise we certainly shouldn't be + // subtracting them. if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first) throw std::runtime_error("Inconsistent parameters exist in the two provided " "variable sets."); @@ -120,8 +127,8 @@ void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& ParameterType.push_back(input_1.ParameterType[i]); Recompute.push_back(input_1.Recompute[i]); - // We can reuse the above values, which aren't subtracted between the - // objects, but the parameter values themselves need to use the + // We can reuse the above values, which aren't subtracted between + // the objects, but the parameter values themselves need to use the // subtracted values. vname = input_1.NameAndValue[i].first; NameAndValue.push_back(pair_type(vname, diff_val)); @@ -132,7 +139,8 @@ void VariableSet::insertFromDiff(const VariableSet& input_1, const VariableSet& num_active_vars = input_1.num_active_vars; } -void VariableSet::removeInactive() +template +void VariableSetT::removeInactive() { std::vector valid(Index); std::vector acopy(NameAndValue); @@ -154,7 +162,8 @@ void VariableSet::removeInactive() } } -void VariableSet::resetIndex() +template +void VariableSetT::resetIndex() { num_active_vars = 0; for (int i = 0; i < Index.size(); ++i) @@ -163,7 +172,8 @@ void VariableSet::resetIndex() } } -void VariableSet::getIndex(const VariableSet& selected) +template +void VariableSetT::getIndex(const VariableSetT& selected) { num_active_vars = 0; for (int i = 0; i < NameAndValue.size(); ++i) @@ -174,7 +184,8 @@ void VariableSet::getIndex(const VariableSet& selected) } } -int VariableSet::getIndex(const std::string& vname) const +template +int VariableSetT::getIndex(const std::string& vname) const { int loc = 0; while (loc != NameAndValue.size()) @@ -186,13 +197,15 @@ int VariableSet::getIndex(const std::string& vname) const return -1; } -void VariableSet::setIndexDefault() +template +void VariableSetT::setIndexDefault() { for (int i = 0; i < Index.size(); ++i) Index[i] = i; } -void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) const +template +void VariableSetT::print(std::ostream& os, int leftPadSpaces, bool printHeader) const { std::string pad_str = std::string(leftPadSpaces, ' '); int max_name_len = 0; @@ -202,7 +215,8 @@ void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) c return e1.first.length() < e2.first.length(); })->first.length(); - int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading value, period, and exponent. + int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading + // value, period, and exponent. int max_type_len = 1; int max_recompute_len = 1; int max_use_len = 3; @@ -244,13 +258,15 @@ void VariableSet::print(std::ostream& os, int leftPadSpaces, bool printHeader) c } } -void VariableSet::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const +template +void VariableSetT::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const { hout.create(filename); // File Versioning // 1.0.0 Initial file version - // 1.1.0 Files could have object-specific data from OptimizableObject::read/writeVariationalParameters + // 1.1.0 Files could have object-specific data from + // OptimizableObject::read/writeVariationalParameters std::vector vp_file_version{1, 1, 0}; hout.write(vp_file_version, "version"); @@ -259,8 +275,9 @@ void VariableSet::writeToHDF(const std::string& filename, qmcplusplus::hdf_archi hout.push("name_value_lists"); - std::vector param_values; + std::vector param_values; std::vector param_names; + for (auto& pair_it : NameAndValue) { param_names.push_back(pair_it.first); @@ -272,7 +289,8 @@ void VariableSet::writeToHDF(const std::string& filename, qmcplusplus::hdf_archi hout.pop(); } -void VariableSet::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin) +template +void VariableSetT::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin) { if (!hin.open(filename, H5F_ACC_RDONLY)) { @@ -292,7 +310,7 @@ void VariableSet::readFromHDF(const std::string& filename, qmcplusplus::hdf_arch throw std::runtime_error(err_msg.str()); } - std::vector param_values; + std::vector param_values; hin.read(param_values, "parameter_values"); std::vector param_names; @@ -310,5 +328,9 @@ void VariableSet::readFromHDF(const std::string& filename, qmcplusplus::hdf_arch hin.pop(); } +template class VariableSetT; +template class VariableSetT; +template class VariableSetT>; +template class VariableSetT>; } // namespace optimize diff --git a/src/QMCWaveFunctions/VariableSetT.h b/src/QMCWaveFunctions/VariableSetT.h new file mode 100644 index 0000000000..cbba3e198b --- /dev/null +++ b/src/QMCWaveFunctions/VariableSetT.h @@ -0,0 +1,336 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H +#define QMCPLUSPLUS_OPTIMIZE_VARIABLESETT_H +#include "config.h" +#include +#include +#include +#include +#include "OrbitalSetTraits.h" + +namespace qmcplusplus +{ +class hdf_archive; +} + +namespace optimize +{ +/** An enum useful for determining the type of parameter is being optimized. +* knowing this in the opt routine can reduce the computational load. +*/ +enum +{ + OTHER_P = 0, + LOGLINEAR_P, //B-spline Jastrows + LOGLINEAR_K, //K space Jastrows + LINEAR_P, //Multi-determinant coefficients + SPO_P, //SPO set Parameters + BACKFLOW_P //Backflow parameters +}; + +/** class to handle a set of variables that can be modified during optimizations + * + * A serialized container of named variables. + */ +template +class VariableSetT +{ +public: + using value_type = typename qmcplusplus::OrbitalSetTraits::ValueType; + using real_type = typename qmcplusplus::OrbitalSetTraits::RealType; + + using pair_type = std::pair; + using index_pair_type = std::pair; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + using size_type = typename std::vector::size_type; + + ///number of active variables + int num_active_vars; + /** store locator of the named variable + * + * if(Index[i] == -1), the named variable is not active + */ + std::vector Index; + std::vector NameAndValue; + std::vector ParameterType; + std::vector Recompute; + + ///default constructor + inline VariableSetT() : num_active_vars(0) {} + ///viturval destructor for safety + virtual ~VariableSetT() = default; + /** if any of Index value is not zero, return true + */ + inline bool is_optimizable() const { return num_active_vars > 0; } + ///return the number of active variables + inline int size_of_active() const { return num_active_vars; } + ///return the first const_iterator + inline const_iterator begin() const { return NameAndValue.begin(); } + ///return the last const_iterator + inline const_iterator end() const { return NameAndValue.end(); } + ///return the first iterator + inline iterator begin() { return NameAndValue.begin(); } + ///return the last iterator + inline iterator end() { return NameAndValue.end(); } + ///return the size + inline size_type size() const { return NameAndValue.size(); } + ///return the locator of the i-th Index + inline int where(int i) const { return Index[i]; } + /** return the iterator of a named parameter + * @param vname name of a parameter + * @return the locator of vname + * + * If vname is not found among the Names, return NameAndValue.end() + * so that ::end() member function can be used to validate the iterator. + */ + inline iterator find(const std::string& vname) + { + return std::find_if(NameAndValue.begin(), NameAndValue.end(), + [&vname](const auto& value) { return value.first == vname; }); + } + + /** return the Index vaule for the named parameter + * @param vname name of the variable + * + * If vname is not found in this variables, return -1; + */ + int getIndex(const std::string& vname) const; + + /* return the NameAndValue index for the named parameter + * @ param vname name of the variable + * + * Differs from getIndex by not relying on the indices cached in Index + * myVars[i] will always return the value of the parameter if it is stored + * regardless of whether or not the Index array has been correctly reset + * + * if vname is not found, return -1 + * + */ + inline int getLoc(const std::string& vname) const + { + int loc = 0; + while (loc != NameAndValue.size()) + { + if (NameAndValue[loc].first == vname) + return loc; + ++loc; + } + return -1; + } + + inline void insert(const std::string& vname, real_type v, bool enable = true, int type = OTHER_P) + { + iterator loc = find(vname); + int ind_loc = loc - NameAndValue.begin(); + if (loc == NameAndValue.end()) // && enable==true) + { + Index.push_back(ind_loc); + NameAndValue.push_back(pair_type(vname, v)); + ParameterType.push_back(index_pair_type(vname, type)); + Recompute.push_back(index_pair_type(vname, 1)); + } + //disable it if enable == false + if (!enable) + Index[ind_loc] = -1; + } + + inline void setParameterType(int type) + { + std::vector::iterator PTit(ParameterType.begin()), PTend(ParameterType.end()); + while (PTit != PTend) + { + (*PTit).second = type; + PTit++; + } + } + + inline void getParameterTypeList(std::vector& types) const + { + auto ptit(ParameterType.begin()), ptend(ParameterType.end()); + types.resize(ptend - ptit); + auto tit(types.begin()); + while (ptit != ptend) + (*tit++) = (*ptit++).second; + } + + + /** equivalent to std::map[string] operator + */ + inline real_type& operator[](const std::string& vname) + { + iterator loc = find(vname); + if (loc == NameAndValue.end()) + { + Index.push_back(-1); + NameAndValue.push_back(pair_type(vname, 0)); + ParameterType.push_back(index_pair_type(vname, 0)); + Recompute.push_back(index_pair_type(vname, 1)); + return NameAndValue.back().second; + } + return (*loc).second; + } + + + /** return the name of i-th variable + * @param i index + */ + const std::string& name(int i) const { return NameAndValue[i].first; } + + /** return the i-th value + * @param i index + */ + inline real_type operator[](int i) const { return NameAndValue[i].second; } + + /** assign the i-th value + * @param i index + */ + inline real_type& operator[](int i) { return NameAndValue[i].second; } + + /** get the i-th parameter's type + * @param i index + */ + inline int getType(int i) const { return ParameterType[i].second; } + + inline bool recompute(int i) const { return (Recompute[i].second == 1); } + + inline int& recompute(int i) { return Recompute[i].second; } + + inline void setComputed() + { + for (int i = 0; i < Recompute.size(); i++) + { + if (ParameterType[i].second == LOGLINEAR_P) + Recompute[i].second = 0; + else if (ParameterType[i].second == LOGLINEAR_K) + Recompute[i].second = 0; + else + Recompute[i].second = 1; + } + } + + inline void setRecompute() + { + for (int i = 0; i < Recompute.size(); i++) + Recompute[i].second = 1; + } + + /** clear the variable set + * + * Remove all the data. + */ + void clear(); + + /** insert a VariableSet to the list + * @param input variables + */ + void insertFrom(const VariableSetT& input); + + /** sum together the values of the optimizable parameter values in + * two VariableSet objects, and set this object's values to equal them. + * @param first set of input variables + * @param second set of input variables + */ + void insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2); + + /** take the difference (input_1-input_2) of values of the optimizable + * parameter values in two VariableSet objects, and set this object's + * values to equal them. + * @param first set of input variables + * @param second set of input variables + */ + void insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2); + + /** activate variables for optimization + * @param first iterator of the first name + * @param last iterator of the last name + * @param reindex if true, Index is updated + * + * The status of a variable that is not included in the [first,last) + * remains the same. + */ + template + void activate(ForwardIterator first, ForwardIterator last, bool reindex) + { + while (first != last) + { + iterator loc = find(*first++); + if (loc != NameAndValue.end()) + { + int i = loc - NameAndValue.begin(); + if (Index[i] < 0) + Index[i] = num_active_vars++; + } + } + if (reindex) + { + removeInactive(); + resetIndex(); + } + } + + /** deactivate variables for optimization + * @param first iterator of the first name + * @param last iterator of the last name + * @param reindex if true, the variales are removed and Index is updated + */ + template + void disable(ForwardIterator first, ForwardIterator last, bool reindex) + { + while (first != last) + { + int loc = find(*first++) - NameAndValue.begin(); + if (loc < NameAndValue.size()) + Index[loc] = -1; + } + if (reindex) + { + removeInactive(); + resetIndex(); + } + } + + /** reset Index + */ + void resetIndex(); + /** remove inactive variables and trim the internal data + */ + void removeInactive(); + + /** set the index table of this VariableSet + * @param selected input variables + * + * This VariableSet is a subset of selected. + */ + void getIndex(const VariableSetT& selected); + + /** set default Indices, namely all the variables are active + */ + void setIndexDefault(); + + void print(std::ostream& os, int leftPadSpaces = 0, bool printHeader = false) const; + + // Save variational parameters to an HDF file + void writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const; + + /// Read variational parameters from an HDF file. + /// This assumes VariableSet is already set up. + void readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin); +}; +} // namespace optimize + +#endif diff --git a/src/QMCWaveFunctions/WaveFunctionPool.h b/src/QMCWaveFunctions/WaveFunctionPool.h index 36e562367b..81459ae6dc 100644 --- a/src/QMCWaveFunctions/WaveFunctionPool.h +++ b/src/QMCWaveFunctions/WaveFunctionPool.h @@ -21,13 +21,14 @@ #include "Message/MPIObjectBase.h" #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "Utilities/RuntimeOptions.h" +#include "Particle/ParticleSet.h" +#include "Particle/ParticleSetPool.h" + #include #include namespace qmcplusplus { -class ParticleSetPool; -class ParticleSet; /** @ingroup qmcapp * @brief Manage a collection of TrialWaveFunction objects diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt index 36c35cee7e..c0e7ead567 100644 --- a/src/QMCWaveFunctions/tests/CMakeLists.txt +++ b/src/QMCWaveFunctions/tests/CMakeLists.txt @@ -114,8 +114,10 @@ set(SPOSET_SRC test_CompositeSPOSet.cpp test_hybridrep.cpp test_pw.cpp - test_ConstantSPOSet.cpp test_LCAO_diamondC_2x1x1.cpp + test_ConstantSPOSetT.cpp + test_RotatedSPOsT.cpp + ${MO_SRCS}) if(NiO_a16_H5_FOUND) set(SPOSET_SRC ${SPOSET_SRC} test_einset_NiO_a16.cpp) @@ -144,17 +146,10 @@ set(DETERMINANT_SRC test_ci_configuration.cpp test_multi_slater_determinant.cpp) -add_library(sposets_for_testing FakeSPO.cpp ConstantSPOSet.cpp) +add_library(sposets_for_testing FakeSPOT.cpp FakeSPO.cpp ConstantSPOSetT.cpp) target_include_directories(sposets_for_testing PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(sposets_for_testing PUBLIC qmcwfs) -# @TODO: Remove when rotations work for complex stuff -if(NOT QMC_COMPLEX) - if(NOT ENABLE_CUDA) - set(SPOSET_SRC test_RotatedSPOs.cpp ${SPOSET_SRC}) - endif() -endif() - if(ENABLE_CUDA) set(DETERMINANT_SRC ${DETERMINANT_SRC} test_DiracMatrixComputeCUDA.cpp test_cuBLAS_LU.cpp) if(NOT QMC_CUDA2HIP) diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp deleted file mode 100644 index f9ad56e330..0000000000 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.cpp +++ /dev/null @@ -1,100 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2023 Raymond Clay and QMCPACK developers. -// -// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -// -// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "QMCWaveFunctions/tests/ConstantSPOSet.h" - -namespace qmcplusplus -{ -ConstantSPOSet::ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals) - : SPOSet(my_name), numparticles_(nparticles) -{ - OrbitalSetSize = norbitals; - ref_psi_.resize(numparticles_, OrbitalSetSize); - ref_egrad_.resize(numparticles_, OrbitalSetSize); - ref_elapl_.resize(numparticles_, OrbitalSetSize); - - ref_psi_ = 0.0; - ref_egrad_ = 0.0; - ref_elapl_ = 0.0; -}; - -std::unique_ptr ConstantSPOSet::makeClone() const -{ - auto myclone = std::make_unique(my_name_, numparticles_, OrbitalSetSize); - myclone->setRefVals(ref_psi_); - myclone->setRefEGrads(ref_egrad_); - myclone->setRefELapls(ref_elapl_); - return myclone; -}; - -std::string ConstantSPOSet::getClassName() const { return "ConstantSPOSet"; }; - -void ConstantSPOSet::checkOutVariables(const opt_variables_type& active) -{ - APP_ABORT("ConstantSPOSet should not call checkOutVariables"); -}; - -void ConstantSPOSet::setOrbitalSetSize(int norbs) { APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); } - -void ConstantSPOSet::setRefVals(const ValueMatrix& vals) -{ - assert(vals.cols() == OrbitalSetSize); - assert(vals.rows() == numparticles_); - ref_psi_ = vals; -}; -void ConstantSPOSet::setRefEGrads(const GradMatrix& grads) -{ - assert(grads.cols() == OrbitalSetSize); - assert(grads.rows() == numparticles_); - ref_egrad_ = grads; -}; -void ConstantSPOSet::setRefELapls(const ValueMatrix& lapls) -{ - assert(lapls.cols() == OrbitalSetSize); - assert(lapls.rows() == numparticles_); - ref_elapl_ = lapls; -}; - -void ConstantSPOSet::evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) -{ - const auto* vp = dynamic_cast(&P); - int ptcl = vp ? vp->refPtcl : iat; - assert(psi.size() == OrbitalSetSize); - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - psi[iorb] = ref_psi_(ptcl, iorb); -}; - -void ConstantSPOSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) -{ - for (int iorb = 0; iorb < OrbitalSetSize; iorb++) - { - psi[iorb] = ref_psi_(iat, iorb); - dpsi[iorb] = ref_egrad_(iat, iorb); - d2psi[iorb] = ref_elapl_(iat, iorb); - } -}; - -void ConstantSPOSet::evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) -{ - for (int iat = first, i = 0; iat < last; ++iat, ++i) - { - ValueVector v(logdet[i], logdet.cols()); - GradVector g(dlogdet[i], dlogdet.cols()); - ValueVector l(d2logdet[i], d2logdet.cols()); - evaluateVGL(P, iat, v, g, l); - } -} -} //namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSet.h b/src/QMCWaveFunctions/tests/ConstantSPOSet.h index c42461856d..5e139d57ba 100644 --- a/src/QMCWaveFunctions/tests/ConstantSPOSet.h +++ b/src/QMCWaveFunctions/tests/ConstantSPOSet.h @@ -13,75 +13,12 @@ #ifndef QMCPLUSPLUS_CONSTANTSPOSET_H #define QMCPLUSPLUS_CONSTANTSPOSET_H -#include "QMCWaveFunctions/SPOSet.h" +#include "Configuration.h" +#include "QMCWaveFunctions/tests/ConstantSPOSetT.h" namespace qmcplusplus { -/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing value, gradients, and laplacians, etc., - * These values are accessed through standard SPOSet calls like evaluateValue, evaluateVGL, etc. - * Exists to provide deterministic and known output to objects requiring SPOSet evaluations. - * - */ -class ConstantSPOSet : public SPOSet -{ -public: - ConstantSPOSet(const std::string& my_name) = delete; - - //Constructor needs number of particles and number of orbitals. This is the minimum - //amount of information needed to sanely construct all data members and perform size - //checks later. - ConstantSPOSet(const std::string& my_name, const int nparticles, const int norbitals); - - std::unique_ptr makeClone() const override; - - std::string getClassName() const override; - - void checkOutVariables(const opt_variables_type& active) override; - - void setOrbitalSetSize(int norbs) override; - - /** - * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. - * @param Nelec x Nion ValueType matrix of \phi_j(r_i) - * @return void - */ - void setRefVals(const ValueMatrix& vals); - /** - * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in ref_egrad_. - * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) - * @return void - */ - void setRefEGrads(const GradMatrix& grads); - /** - * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix in ref_elapl_. - * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) - * @return void - */ - void setRefELapls(const ValueMatrix& lapls); - - void evaluateValue(const ParticleSet& P, int iat, ValueVector& psi) override; - - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; - - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override; - -private: - const int numparticles_; /// evaluate_notranspose arrays are nparticle x norb matrices. - /// To ensure consistent array sizing and enforcement, - /// we agree at construction how large these matrices will be. - /// norb is stored in SPOSet::OrbitalSetSize. - - //Value, electron gradient, and electron laplacian at "reference configuration". - //i.e. before any attempted moves. +using ConstantSPOSet = ConstantSPOSetT; - ValueMatrix ref_psi_; - GradMatrix ref_egrad_; - ValueMatrix ref_elapl_; -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp new file mode 100644 index 0000000000..3295b1ce71 --- /dev/null +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp @@ -0,0 +1,124 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 Raymond Clay and QMCPACK developers. +// +// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#include "ConstantSPOSetT.h" + +namespace qmcplusplus +{ + +template +ConstantSPOSetT::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals) + : SPOSetT(my_name), numparticles_(nparticles) +{ + this->OrbitalSetSize = norbitals; + ref_psi_.resize(numparticles_, this->OrbitalSetSize); + ref_egrad_.resize(numparticles_, this->OrbitalSetSize); + ref_elapl_.resize(numparticles_, this->OrbitalSetSize); + + ref_psi_ = 0.0; + ref_egrad_ = 0.0; + ref_elapl_ = 0.0; +} + +template +std::unique_ptr> ConstantSPOSetT::makeClone() const +{ + auto myclone = std::make_unique>(this->my_name_, numparticles_, this->OrbitalSetSize); + myclone->setRefVals(ref_psi_); + myclone->setRefEGrads(ref_egrad_); + myclone->setRefELapls(ref_elapl_); + return myclone; +} + +template +void ConstantSPOSetT::checkOutVariables(const OptVariablesTypeT& active) +{ + APP_ABORT("ConstantSPOSet should not call checkOutVariables"); +}; + +template +void ConstantSPOSetT::setOrbitalSetSize(int norbs) +{ + APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()"); +} + +template +void ConstantSPOSetT::setRefVals(const ValueMatrix& vals) +{ + assert(vals.cols() == this->OrbitalSetSize); + assert(vals.rows() == numparticles_); + ref_psi_ = vals; +} + +template +void ConstantSPOSetT::setRefEGrads(const GradMatrix& grads) +{ + assert(grads.cols() == this->OrbitalSetSize); + assert(grads.rows() == numparticles_); + ref_egrad_ = grads; +} + +template +void ConstantSPOSetT::setRefELapls(const ValueMatrix& lapls) +{ + assert(lapls.cols() == this->OrbitalSetSize); + assert(lapls.rows() == numparticles_); + ref_elapl_ = lapls; +} + +template +void ConstantSPOSetT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + const auto* vp = dynamic_cast*>(&P); + int ptcl = vp ? vp->refPtcl : iat; + assert(psi.size() == this->OrbitalSetSize); + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + psi[iorb] = ref_psi_(ptcl, iorb); +} + +template +void ConstantSPOSetT::evaluateVGL(const ParticleSetT& P, + int iat, + ValueVector& psi, + GradVector& dpsi, + ValueVector& d2psi) +{ + for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) + { + psi[iorb] = ref_psi_(iat, iorb); + dpsi[iorb] = ref_egrad_(iat, iorb); + d2psi[iorb] = ref_elapl_(iat, iorb); + } +} + +template +void ConstantSPOSetT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + for (int iat = first, i = 0; iat < last; ++iat, ++i) + { + ValueVector v(logdet[i], logdet.cols()); + GradVector g(dlogdet[i], dlogdet.cols()); + ValueVector l(d2logdet[i], d2logdet.cols()); + evaluateVGL(P, iat, v, g, l); + } +} + +template class ConstantSPOSetT; +template class ConstantSPOSetT; +template class ConstantSPOSetT>; +template class ConstantSPOSetT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h new file mode 100644 index 0000000000..6f1c3b6204 --- /dev/null +++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h @@ -0,0 +1,96 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 Raymond Clay and QMCPACK developers. +// +// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_CONSTANTSPOSETT_H +#define QMCPLUSPLUS_CONSTANTSPOSETT_H + +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +/** Constant SPOSet for testing purposes. Fixed N_elec x N_orb matrices storing + * value, gradients, and laplacians, etc., These values are accessed through + * standard SPOSet calls like evaluateValue, evaluateVGL, etc. Exists to provide + * deterministic and known output to objects requiring SPOSet evaluations. + * + */ +template +class ConstantSPOSetT : public SPOSetT +{ +public: + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradMatrix = typename SPOSetT::GradMatrix; + using ValueVector = typename SPOSetT::ValueVector; + using GradVector = typename SPOSetT::GradVector; + + ConstantSPOSetT(const std::string& my_name) = delete; + + // Constructor needs number of particles and number of orbitals. This is + // the minimum amount of information needed to sanely construct all data + // members and perform size checks later. + ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals); + + std::unique_ptr> makeClone() const final; + + std::string getClassName() const final { return "ConstantSPOSet"; }; + + void checkOutVariables(const OptVariablesTypeT& active) final; + + void setOrbitalSetSize(int norbs) final; + + /** + * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_. + * @param Nelec x Nion ValueType matrix of \phi_j(r_i) + * @return void + */ + void setRefVals(const ValueMatrix& vals); + /** + * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in + * ref_egrad_. + * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i) + * @return void + */ + void setRefEGrads(const GradMatrix& grads); + /** + * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix + * in ref_elapl_. + * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i) + * @return void + */ + void setRefELapls(const ValueMatrix& lapls); + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) final; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) final; + +private: + const int numparticles_; /// evaluate_notranspose arrays are nparticle x + /// norb matrices. To ensure consistent array + /// sizing and enforcement, we agree at + /// construction how large these matrices will be. + /// norb is stored in SPOSet::OrbitalSetSize. + + // Value, electron gradient, and electron laplacian at "reference + // configuration". i.e. before any attempted moves. + + ValueMatrix ref_psi_; + GradMatrix ref_egrad_; + ValueMatrix ref_elapl_; +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp new file mode 100644 index 0000000000..7fc1bf7611 --- /dev/null +++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp @@ -0,0 +1,159 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#include "FakeSPOT.h" + +namespace qmcplusplus +{ +template +FakeSPOT::FakeSPOT() : SPOSetT("one_FakeSPO") +{ + a.resize(3, 3); + + a(0, 0) = 2.3; + a(0, 1) = 4.5; + a(0, 2) = 2.6; + a(1, 0) = 0.5; + a(1, 1) = 8.5; + a(1, 2) = 3.3; + a(2, 0) = 1.8; + a(2, 1) = 4.4; + a(2, 2) = 4.9; + + v.resize(3); + v[0] = 1.9; + v[1] = 2.0; + v[2] = 3.1; + + a2.resize(4, 4); + a2(0, 0) = 2.3; + a2(0, 1) = 4.5; + a2(0, 2) = 2.6; + a2(0, 3) = 1.2; + a2(1, 0) = 0.5; + a2(1, 1) = 8.5; + a2(1, 2) = 3.3; + a2(1, 3) = 0.3; + a2(2, 0) = 1.8; + a2(2, 1) = 4.4; + a2(2, 2) = 4.9; + a2(2, 3) = 2.8; + a2(3, 0) = 0.8; + a2(3, 1) = 4.1; + a2(3, 2) = 3.2; + a2(3, 3) = 1.1; + + v2.resize(4, 4); + + v2(0, 0) = 3.2; + v2(0, 1) = 0.5; + v2(0, 2) = 5.9; + v2(0, 3) = 3.7; + v2(1, 0) = 0.3; + v2(1, 1) = 1.4; + v2(1, 2) = 3.9; + v2(1, 3) = 8.2; + v2(2, 0) = 3.3; + v2(2, 1) = 5.4; + v2(2, 2) = 4.9; + v2(2, 3) = 2.2; + v2(3, 1) = 5.4; + v2(3, 2) = 4.9; + v2(3, 3) = 2.2; + + gv.resize(4); + gv[0] = GradType(1.0, 0.0, 0.1); + gv[1] = GradType(1.0, 2.0, 0.1); + gv[2] = GradType(2.0, 1.0, 0.1); + gv[3] = GradType(0.4, 0.3, 0.1); +} +template +std::unique_ptr> FakeSPOT::makeClone() const +{ + return std::make_unique>(*this); +} + +template +void FakeSPOT::setOrbitalSetSize(int norbs) +{ + this->OrbitalSetSize = norbs; +} + +template +void FakeSPOT::evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) +{ + if (iat < 0) + for (int i = 0; i < psi.size(); i++) + psi[i] = 1.2 * i - i * i; + else if (this->OrbitalSetSize == 3) + for (int i = 0; i < 3; i++) + psi[i] = a(iat, i); + else if (this->OrbitalSetSize == 4) + for (int i = 0; i < 4; i++) + psi[i] = a2(iat, i); +} + +template +void FakeSPOT::evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) +{ + if (this->OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + { + psi[i] = v[i]; + dpsi[i] = gv[i]; + } + } + else if (this->OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + { + psi[i] = v2(iat, i); + dpsi[i] = gv[i]; + } + } +} + +template +void FakeSPOT::evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) +{ + if (this->OrbitalSetSize == 3) + { + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + { + logdet(j, i) = a(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } + else if (this->OrbitalSetSize == 4) + { + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + { + logdet(j, i) = a2(i, j); + dlogdet[i][j] = gv[j] + GradType(i); + } + } +} + +// Class concrete types from ValueType +template class FakeSPOT; +template class FakeSPOT; +template class FakeSPOT>; +template class FakeSPOT>; + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h new file mode 100644 index 0000000000..79f1db1d24 --- /dev/null +++ b/src/QMCWaveFunctions/tests/FakeSPOT.h @@ -0,0 +1,61 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2020 QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_FAKESPOTT_H +#define QMCPLUSPLUS_FAKESPOTT_H + +#include "QMCWaveFunctions/SPOSetT.h" + +namespace qmcplusplus +{ +template +class FakeSPOT : public SPOSetT +{ +public: + Matrix a; + Matrix a2; + Vector v; + Matrix v2; + + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + using GradType = typename SPOSetT::GradType; + + typename SPOSetT::GradVector gv; + + FakeSPOT(); + + ~FakeSPOT() override = default; + + std::string getClassName() const override { return "FakeSPO"; } + + std::unique_ptr> makeClone() const override; + + virtual void report() {} + + void setOrbitalSetSize(int norbs) override; + + void evaluateValue(const ParticleSetT& P, int iat, ValueVector& psi) override; + + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override; + + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override; +}; + +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp new file mode 100644 index 0000000000..56d5b22e8a --- /dev/null +++ b/src/QMCWaveFunctions/tests/test_ConstantSPOSetT.cpp @@ -0,0 +1,136 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +// +// File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + + +#include "catch.hpp" +#include "Configuration.h" +#include "QMCWaveFunctions/WaveFunctionTypes.hpp" +#include "QMCWaveFunctions/tests/ConstantSPOSetT.h" +#include "Utilities/for_testing/checkMatrix.hpp" +namespace qmcplusplus +{ +//Ray: Figure out how to template me on value type. +TEST_CASE("ConstantSPOSetT", "[wavefunction]") +{ + //For now, do a small square case. + const int nelec = 2; + const int norb = 2; + using WF = WaveFunctionTypes; + using Real = WF::Real; + using Value = WF::Value; + using Grad = WF::Grad; + using ValueVector = Vector; + using GradVector = Vector; + using ValueMatrix = Matrix; + using GradMatrix = Matrix; + + ValueVector row0{Value(0.92387953), Value(0.92387953)}; + ValueVector row1{Value(0.29131988), Value(0.81078057)}; + + GradVector grow0{Grad({-2.22222, -1.11111, 0.33333}), Grad({8.795388, -0.816057, -0.9238793})}; + GradVector grow1{Grad({2.22222, 1.11111, -0.33333}), Grad({-8.795388, 0.816057, 0.9238793})}; + + ValueVector lrow0{Value(-0.2234545), Value(0.72340234)}; + ValueVector lrow1{Value(-12.291810), Value(6.879057)}; + + + ValueMatrix spomat; + GradMatrix gradspomat; + ValueMatrix laplspomat; + + spomat.resize(nelec, norb); + gradspomat.resize(nelec, norb); + laplspomat.resize(nelec, norb); + + for (int iorb = 0; iorb < norb; iorb++) + { + spomat(0, iorb) = row0[iorb]; + spomat(1, iorb) = row1[iorb]; + + gradspomat(0, iorb) = grow0[iorb]; + gradspomat(1, iorb) = grow1[iorb]; + + laplspomat(0, iorb) = lrow0[iorb]; + laplspomat(1, iorb) = lrow1[iorb]; + } + + + const SimulationCellT simulation_cell; + ParticleSetT elec(simulation_cell); + + elec.create({nelec}); + + ValueVector psiV = {0.0, 0.0}; + ValueVector psiL = {0.0, 0.0}; + GradVector psiG; + psiG.resize(norb); + + //Test of value only constructor. + auto sposet = std::make_unique>("constant_spo", nelec, norb); + sposet->setRefVals(spomat); + sposet->setRefEGrads(gradspomat); + sposet->setRefELapls(laplspomat); + + sposet->evaluateValue(elec, 0, psiV); + + CHECK(psiV[0] == row0[0]); + CHECK(psiV[1] == row0[1]); + + + psiV = 0.0; + + sposet->evaluateValue(elec, 1, psiV); + CHECK(psiV[0] == row1[0]); + CHECK(psiV[1] == row1[1]); + + psiV = 0.0; + + sposet->evaluateVGL(elec, 1, psiV, psiG, psiL); + + for (int iorb = 0; iorb < norb; iorb++) + { + CHECK(psiV[iorb] == row1[iorb]); + CHECK(psiL[iorb] == lrow1[iorb]); + + for (int idim = 0; idim < OHMMS_DIM; idim++) + CHECK(psiG[iorb][idim] == grow1[iorb][idim]); + } + //Test of evaluate_notranspose. + ValueMatrix phimat, lphimat; + GradMatrix gphimat; + phimat.resize(nelec, norb); + gphimat.resize(nelec, norb); + lphimat.resize(nelec, norb); + + const int first_index = 0; //Only 2 electrons in this case. + const int last_index = 2; + sposet->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat); + + checkMatrix(phimat, spomat); + checkMatrix(lphimat, laplspomat); + + //Test of makeClone() + auto sposet_vgl2 = sposet->makeClone(); + phimat = 0.0; + gphimat = 0.0; + lphimat = 0.0; + + sposet_vgl2->evaluate_notranspose(elec, first_index, last_index, phimat, gphimat, lphimat); + + checkMatrix(phimat, spomat); + checkMatrix(lphimat, laplspomat); + + //Lastly, check if name is correct. + std::string myname = sposet_vgl2->getClassName(); + std::string targetstring("ConstantSPOSet"); + CHECK(myname == targetstring); +} +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp b/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp index 4ce591df94..aa5149ee9f 100644 --- a/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp +++ b/src/QMCWaveFunctions/tests/test_DiracDeterminantBatched.cpp @@ -776,7 +776,7 @@ void test_DiracDeterminantBatched_spinor_update(const int delay_rank, DetMatInve //reject move and check for initial values for mw_evalGrad std::fill(grads.begin(), grads.end(), 0); - elec_.mw_accept_rejectMove(p_ref_list, 1, {false, false}); + elec_.mw_accept_rejectMoveT(p_ref_list, 1, {false, false}); dd.mw_evalGrad(dd_ref_list, p_ref_list, 1, grads); for (int iw = 0; iw < grads.size(); iw++) { @@ -798,7 +798,7 @@ void test_DiracDeterminantBatched_spinor_update(const int delay_rank, DetMatInve //now make and accept move, checking new values elec_.mw_makeMove(p_ref_list, 1, displs); - elec_.mw_accept_rejectMove(p_ref_list, 1, {true, true}); + elec_.mw_accept_rejectMoveT(p_ref_list, 1, {true, true}); G = 0; L = 0; diff --git a/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp b/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp index 14ee906568..427a41b88f 100644 --- a/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp +++ b/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp @@ -21,7 +21,8 @@ #include #include "QMCHamiltonians/NLPPJob.h" #include "DistanceTable.h" - +#include "QMCWaveFunctions/SPOSet.h" +#include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" #include #include #include @@ -337,9 +338,10 @@ void test_LCAO_DiamondC_2x1x1_real() ratios_list[iw].resize(nvp_list[iw]); // just need dummy refvec with correct size - SPOSet::ValueVector tmp_psi_list(norb); + SPOSet::ValueVector tmp_psi_list(norb), tmp_psi_list_2(norb); spo->mw_evaluateDetRatios(spo_list, RefVectorWithLeader(VP_, {VP_, VP_2}), - RefVector{tmp_psi_list}, invRow_ptr_list, ratios_list); + RefVector{tmp_psi_list, tmp_psi_list_2}, invRow_ptr_list, + ratios_list); std::vector ratios_ref_0(nvp_); std::vector ratios_ref_1(nvp_2); diff --git a/src/QMCWaveFunctions/tests/test_MO.cpp b/src/QMCWaveFunctions/tests/test_MO.cpp index 95539da705..24f31800a9 100644 --- a/src/QMCWaveFunctions/tests/test_MO.cpp +++ b/src/QMCWaveFunctions/tests/test_MO.cpp @@ -19,7 +19,8 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" -#include +#include "ResourceCollection.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp index 721a563349..1b8f323dec 100644 --- a/src/QMCWaveFunctions/tests/test_MO_spinor.cpp +++ b/src/QMCWaveFunctions/tests/test_MO_spinor.cpp @@ -20,6 +20,7 @@ #include "QMCWaveFunctions/SPOSetBuilderFactory.h" #include "Utilities/ResourceCollection.h" #include "QMCWaveFunctions/SpinorSet.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { @@ -114,11 +115,11 @@ void test_lcao_spinor() CHECK(d2psiM[iat][0] == ComplexApprox(vlp).epsilon(eps)); } - /** this is a somewhat simple example. We have an ion at the origin - * and a gaussian basis function centered on the ion as a orbital. - * In this case, the ion derivative is actually just the negative of - * the electron gradient. - */ + // this is a somewhat simple example. We have an ion at the origin + // and a gaussian basis function centered on the ion as a orbital. + // In this case, the ion derivative is actually just the negative of + // the electron gradient. + SPOSet::GradMatrix gradIon(elec_.R.size(), spo->getOrbitalSetSize()); spo->evaluateGradSource(elec_, 0, elec_.R.size(), ions_, 0, gradIon); for (int iat = 0; iat < 1; iat++) @@ -146,9 +147,9 @@ void test_lcao_spinor() d2psi_work.resize(OrbitalSetSize); dspsi_work.resize(OrbitalSetSize); - //We worked hard to generate nice reference data above. Let's generate a test for evaluateV - //and evaluateVGL by perturbing the electronic configuration by dR, and then make - //single particle moves that bring it back to our original R reference values. + // We worked hard to generate nice reference data above. Let's generate a test for evaluateV + // and evaluateVGL by perturbing the electronic configuration by dR, and then make + // single particle moves that bring it back to our original R reference values. //Our perturbation vector. ParticleSet::ParticlePos dR; @@ -164,7 +165,7 @@ void test_lcao_spinor() elec_.R = Rnew; elec_.update(); - //Now we test evaluateValue() + // Now we test evaluateValue() for (unsigned int iat = 0; iat < 1; iat++) { psi_work = 0.0; @@ -175,7 +176,7 @@ void test_lcao_spinor() elec_.rejectMove(iat); } - //Now we test evaluateVGL() + // Now we test evaluateVGL() for (unsigned int iat = 0; iat < 1; iat++) { psi_work = 0.0; @@ -195,7 +196,7 @@ void test_lcao_spinor() elec_.rejectMove(iat); } - //Now we test evaluateSpin: + // Now we test evaluateSpin: for (unsigned int iat = 0; iat < 1; iat++) { @@ -217,11 +218,11 @@ void test_lcao_spinor() elec_.R = Rnew; elec_.update(); - //make a spin displacement, just set to zero for the test + // make a spin displacement, just set to zero for the test ParticleSet::ParticleScalar dS; dS.resize(1); - //now create second walker + // now create second walker ParticleSet elec_2(elec_); elec_2.R[0] = {-0.4, 1.5, -0.2}; elec_2.spins[0] = -1.3; @@ -248,8 +249,8 @@ void test_lcao_spinor() spo_list.push_back(*spo); spo_list.push_back(*spo_2); - //test resource APIs - //First resource is created, and then passed to the colleciton so it should be null + // test resource APIs + // First resource is created, and then passed to the colleciton so it should be null ResourceCollection spo_res("test_spo_res"); spo->createResource(spo_res); SpinorSet& spinor = spo_list.getCastedLeader(); @@ -275,13 +276,13 @@ void test_lcao_spinor() spo->mw_evaluate_notranspose(spo_list, p_list, 0, 1, logdet_list, dlogdet_list, d2logdet_list); for (unsigned int iat = 0; iat < 1; iat++) { - //walker 0 + // walker 0 CHECK(logdet_list[0].get()[iat][0] == ComplexApprox(val).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][0] == ComplexApprox(vdx).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][1] == ComplexApprox(vdy).epsilon(eps)); CHECK(dlogdet_list[0].get()[iat][0][2] == ComplexApprox(vdz).epsilon(eps)); CHECK(d2logdet_list[0].get()[iat][0] == ComplexApprox(vlp).epsilon(eps)); - //walker 1 + // walker 1 CHECK(logdet_list[1].get()[iat][0] == ComplexApprox(val2).epsilon(eps)); CHECK(dlogdet_list[1].get()[iat][0][0] == ComplexApprox(vdx2).epsilon(eps)); CHECK(dlogdet_list[1].get()[iat][0][1] == ComplexApprox(vdy2).epsilon(eps)); @@ -289,7 +290,7 @@ void test_lcao_spinor() CHECK(d2logdet_list[1].get()[iat][0] == ComplexApprox(vlp2).epsilon(eps)); } - //first, lets displace all the elec in each walker + // first, lets displace all the elec in each walker for (int iat = 0; iat < 1; iat++) { MCCoords displs(2); @@ -297,7 +298,7 @@ void test_lcao_spinor() displs.spins = {dS[iat], dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -310,10 +311,10 @@ void test_lcao_spinor() RefVector d2psi_v_list = {d2psi_work, d2psi_work_2}; SPOSet::OffloadMatrix mw_dspin; mw_dspin.resize(2, OrbitalSetSize); - //check mw_evaluateVGLWithSpin + // check mw_evaluateVGLWithSpin for (int iat = 0; iat < 1; iat++) { - //reset values to zero, updates the ref vectors to zero as well + // reset values to zero, updates the ref vectors to zero as well psi_work = 0.0; dpsi_work = 0.0; d2psi_work = 0.0; @@ -327,14 +328,14 @@ void test_lcao_spinor() displs.spins = {-dS[iat], -dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); spo->mw_evaluateVGLWithSpin(spo_list, p_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin); - //walker 0 + // walker 0 CHECK(psi_v_list[0].get()[0] == ComplexApprox(val).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][0] == ComplexApprox(vdx).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][1] == ComplexApprox(vdy).epsilon(eps)); CHECK(dpsi_v_list[0].get()[0][2] == ComplexApprox(vdz).epsilon(eps)); CHECK(d2psi_v_list[0].get()[0] == ComplexApprox(vlp).epsilon(eps)); CHECK(mw_dspin(0, 0) == ComplexApprox(vds).epsilon(eps)); - //walker 1 + // walker 1 CHECK(psi_v_list[1].get()[0] == ComplexApprox(val2).epsilon(eps)); CHECK(dpsi_v_list[1].get()[0][0] == ComplexApprox(vdx2).epsilon(eps)); CHECK(dpsi_v_list[1].get()[0][1] == ComplexApprox(vdy2).epsilon(eps)); @@ -343,7 +344,7 @@ void test_lcao_spinor() CHECK(mw_dspin(1, 0) == ComplexApprox(vds2).epsilon(eps)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } @@ -621,7 +622,7 @@ void test_lcao_spinor_excited() displs.spins = {dS[iat], dS[iat]}; elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -666,7 +667,7 @@ void test_lcao_spinor_excited() CHECK(d2psi_v_list[1].get()[0] == ComplexApprox(vlp2).epsilon(eps)); CHECK(mw_dspin(1, 0) == ComplexApprox(vds2).epsilon(eps)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp deleted file mode 100644 index af6f5b9cf0..0000000000 --- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp +++ /dev/null @@ -1,868 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2022 QMCPACK developers. -// -// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories -// -// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories -////////////////////////////////////////////////////////////////////////////////////// - -#include "catch.hpp" - -#include "type_traits/template_types.hpp" -#include "type_traits/ConvertToReal.h" -#include "OhmmsData/Libxml2Doc.h" -#include "OhmmsPETE/OhmmsMatrix.h" -#include "Particle/ParticleSet.h" -#include "Particle/ParticleSetPool.h" -#include "QMCWaveFunctions/WaveFunctionComponent.h" -#include "BsplineFactory/EinsplineSetBuilder.h" -#include "QMCWaveFunctions/RotatedSPOs.h" -#include "checkMatrix.hpp" -#include "FakeSPO.h" -#include - -#include -#include -#include - -using std::string; - -namespace qmcplusplus -{ -/* - JPT 04.01.2022: Adapted from test_einset.cpp - Test the spline rotated machinery for SplineR2R (extend to others later). -*/ -TEST_CASE("RotatedSPOs via SplineR2R", "[wavefunction]") -{ - using RealType = QMCTraits::RealType; - - /* - BEGIN Boilerplate stuff to make a simple SPOSet. Copied from test_einset.cpp - */ - - Communicate* c = OHMMS::Controller; - - // We get a "Mismatched supercell lattices" error due to default ctor? - ParticleSet::ParticleLayout lattice; - - // diamondC_1x1x1 - lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, 3.37316115, 0.0, 3.37316115}; - - ParticleSetPool ptcl = ParticleSetPool(c); - ptcl.setSimulationCell(lattice); - // LAttice seems fine after this point... - - auto ions_uptr = std::make_unique(ptcl.getSimulationCell()); - auto elec_uptr = std::make_unique(ptcl.getSimulationCell()); - ParticleSet& ions_(*ions_uptr); - ParticleSet& elec_(*elec_uptr); - - ions_.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions_.create({2}); - ions_.R[0] = {0.0, 0.0, 0.0}; - ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; - elec_.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec_.create({2}); - elec_.R[0] = {0.0, 0.0, 0.0}; - elec_.R[1] = {0.0, 1.0, 0.0}; - SpeciesSet& tspecies = elec_.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - //diamondC_1x1x1 - 8 bands available - const char* particles = R"( - - -)"; - - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); - - xmlNodePtr root = doc.getRoot(); - - xmlNodePtr ein1 = xmlFirstElementChild(root); - - EinsplineSetBuilder einSet(elec_, ptcl.getPool(), c, ein1); - auto spo = einSet.createSPOSetFromXML(ein1); - REQUIRE(spo); - - /* - END Boilerplate stuff. Now we have a SplineR2R wavefunction - ready for rotation. What follows is the actual test. - */ - - // SplineR2R only for the moment, so skip if QMC_COMPLEX is set -#if !defined(QMC_COMPLEX) - - spo->storeParamsBeforeRotation(); - // 1.) Make a RotatedSPOs object so that we can use the rotation routines - auto rot_spo = std::make_unique("one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 8); - - // 2.) Get data for unrotated orbitals. Check that there's no rotation - rot_spo->buildOptVariables(elec_.R.size()); - SPOSet::ValueMatrix psiM_bare(elec_.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_bare(elec_.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_bare(elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // This stuff checks that no rotation was applied. Copied from test_einset.cpp. - // value - CHECK(std::real(psiM_bare[1][0]) == Approx(-0.8886948824)); - CHECK(std::real(psiM_bare[1][1]) == Approx(1.4194120169)); - // grad - CHECK(std::real(dpsiM_bare[1][0][0]) == Approx(-0.0000183403)); - CHECK(std::real(dpsiM_bare[1][0][1]) == Approx(0.1655139178)); - CHECK(std::real(dpsiM_bare[1][0][2]) == Approx(-0.0000193077)); - CHECK(std::real(dpsiM_bare[1][1][0]) == Approx(-1.3131694794)); - CHECK(std::real(dpsiM_bare[1][1][1]) == Approx(-1.1174004078)); - CHECK(std::real(dpsiM_bare[1][1][2]) == Approx(-0.8462534547)); - // lapl - CHECK(std::real(d2psiM_bare[1][0]) == Approx(1.3313053846)); - CHECK(std::real(d2psiM_bare[1][1]) == Approx(-4.712583065)); - - /* - 3.) Apply a rotation to the orbitals - To do this, construct a params vector and call the - RotatedSPOs::apply_rotation(params) method. That should do the - right thing for this particular spline class. - - For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params. - */ - const auto rot_size = rot_spo->m_act_rot_inds.size(); - REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 - std::vector param(rot_size); - for (auto i = 0; i < rot_size; i++) - { - param[i] = 0.01 * static_cast(i); - } - rot_spo->apply_rotation(param, false); // Expect this to call SplineR2R::applyRotation() - - // 4.) Get data for rotated orbitals. - SPOSet::ValueMatrix psiM_rot(elec_.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_rot(elec_.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_rot(elec_.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); - - /* - Manually encode the unitary transformation. Ugly, but it works. - @TODO: Use the total rotation machinery when it's implemented - - NB: This is truncated to 5 sig-figs, so there is some slop here as - compared to what is done in the splines via apply_rotation(). - So below we reduce the threshold for comparison. This can - probably be ditched once we have a way to grab the actual - rotation matrix... - */ - SPOSet::ValueMatrix rot_mat(orbitalsetsize, orbitalsetsize); - rot_mat[0][0] = 0.99726; - rot_mat[0][1] = -0.00722; - rot_mat[0][2] = 0.00014; - rot_mat[0][3] = -0.00982; - rot_mat[0][4] = -0.01979; - rot_mat[0][5] = -0.02976; - rot_mat[0][6] = -0.03972; - rot_mat[0][7] = -0.04969; - rot_mat[1][0] = -0.00722; - rot_mat[1][1] = 0.97754; - rot_mat[1][2] = -0.05955; - rot_mat[1][3] = -0.06945; - rot_mat[1][4] = -0.07935; - rot_mat[1][5] = -0.08925; - rot_mat[1][6] = -0.09915; - rot_mat[1][7] = -0.10905; - rot_mat[2][0] = -0.00014; - rot_mat[2][1] = 0.05955; - rot_mat[2][2] = 0.99821; - rot_mat[2][3] = -0.00209; - rot_mat[2][4] = -0.00239; - rot_mat[2][5] = -0.00269; - rot_mat[2][6] = -0.00299; - rot_mat[2][7] = -0.00329; - rot_mat[3][0] = 0.00982; - rot_mat[3][1] = 0.06945; - rot_mat[3][2] = -0.00209; - rot_mat[3][3] = 0.99751; - rot_mat[3][4] = -0.00289; - rot_mat[3][5] = -0.00329; - rot_mat[3][6] = -0.00368; - rot_mat[3][7] = -0.00408; - rot_mat[4][0] = 0.01979; - rot_mat[4][1] = 0.07935; - rot_mat[4][2] = -0.00239; - rot_mat[4][3] = -0.00289; - rot_mat[4][4] = 0.99661; - rot_mat[4][5] = -0.00388; - rot_mat[4][6] = -0.00438; - rot_mat[4][7] = -0.00488; - rot_mat[5][0] = 0.02976; - rot_mat[5][1] = 0.08925; - rot_mat[5][2] = -0.00269; - rot_mat[5][3] = -0.00329; - rot_mat[5][4] = -0.00388; - rot_mat[5][5] = 0.99552; - rot_mat[5][6] = -0.00508; - rot_mat[5][7] = -0.00568; - rot_mat[6][0] = 0.03972; - rot_mat[6][1] = 0.09915; - rot_mat[6][2] = -0.00299; - rot_mat[6][3] = -0.00368; - rot_mat[6][4] = -0.00438; - rot_mat[6][5] = -0.00508; - rot_mat[6][6] = 0.99422; - rot_mat[6][7] = -0.00647; - rot_mat[7][0] = 0.04969; - rot_mat[7][1] = 0.10905; - rot_mat[7][2] = -0.00329; - rot_mat[7][3] = -0.00408; - rot_mat[7][4] = -0.00488; - rot_mat[7][5] = -0.00568; - rot_mat[7][6] = -0.00647; - rot_mat[7][7] = 0.99273; - - // Now compute the expected values by hand using the transformation above - double val1 = 0.; - double val2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) - { - val1 += psiM_bare[0][i] * rot_mat[i][0]; - val2 += psiM_bare[1][i] * rot_mat[i][0]; - } - - // value - CHECK(std::real(psiM_rot[0][0]) == Approx(val1)); - CHECK(std::real(psiM_rot[1][0]) == Approx(val2)); - - std::vector grad1(3); - std::vector grad2(3); - for (auto j = 0; j < grad1.size(); j++) - { - for (auto i = 0; i < rot_mat.size1(); i++) - { - grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; - grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; - } - } - - // grad - CHECK(dpsiM_rot[0][0][0] == Approx(grad1[0]).epsilon(0.0001)); - CHECK(dpsiM_rot[0][0][1] == Approx(grad1[1]).epsilon(0.0001)); - CHECK(dpsiM_rot[0][0][2] == Approx(grad1[2]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][0] == Approx(grad2[0]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][1] == Approx(grad2[1]).epsilon(0.0001)); - CHECK(dpsiM_rot[1][0][2] == Approx(grad2[2]).epsilon(0.0001)); - - double lap1 = 0.; - double lap2 = 0.; - for (auto i = 0; i < rot_mat.size1(); i++) - { - lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; - lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; - } - - // Lapl - CHECK(std::real(d2psiM_rot[0][0]) == Approx(lap1).epsilon(0.0001)); - CHECK(std::real(d2psiM_rot[1][0]) == Approx(lap2).epsilon(0.0001)); - -#endif -} - -TEST_CASE("RotatedSPOs createRotationIndices", "[wavefunction]") -{ - // No active-active or virtual-virtual rotations - // Only active-virtual - RotatedSPOs::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - CHECK(rot_ind.size() == 2); - - // Full rotation contains all rotations - // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 - RotatedSPOs::RotationIndices full_rot_ind; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind); - CHECK(full_rot_ind.size() == 3); - - nel = 2; - RotatedSPOs::RotationIndices rot_ind2; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind2); - CHECK(rot_ind2.size() == 2); - - RotatedSPOs::RotationIndices full_rot_ind2; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind2); - CHECK(full_rot_ind2.size() == 3); - - nmo = 4; - RotatedSPOs::RotationIndices rot_ind3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind3); - CHECK(rot_ind3.size() == 4); - - RotatedSPOs::RotationIndices full_rot_ind3; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind3); - CHECK(full_rot_ind3.size() == 6); -} - -TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - RotatedSPOs::RotationIndices rot_ind; - int nel = 1; - int nmo = 3; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - - ValueMatrix m3(nmo, nmo); - m3 = ValueType(0); - std::vector params = {0.1, 0.2}; - - RotatedSPOs::constructAntiSymmetricMatrix(rot_ind, params, m3); - - // clang-format off - std::vector expected_data = { 0.0, -0.1, -0.2, - 0.1, 0.0, 0.0, - 0.2, 0.0, 0.0 }; - // clang-format on - - ValueMatrix expected_m3(expected_data.data(), 3, 3); - - CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); } - - std::vector params_out(2); - RotatedSPOs::extractParamsFromAntiSymmetricMatrix(rot_ind, m3, params_out); - CHECK(params_out[0] == Approx(0.1)); - CHECK(params_out[1] == Approx(0.2)); -} - -// Expected values of the matrix exponential come from gen_matrix_ops.py -TEST_CASE("RotatedSPOs exponentiate matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - std::vector mat1_data = {0.0}; - SPOSet::ValueMatrix m1(mat1_data.data(), 1, 1); - RotatedSPOs::exponentiate_antisym_matrix(m1); - // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(m1(0, 0) == ValueApprox(1.0)); - - // clang-format off - std::vector mat2_data = { 0.0, -0.1, - 0.1, 0.0 }; - // clang-format on - - SPOSet::ValueMatrix m2(mat2_data.data(), 2, 2); - RotatedSPOs::exponentiate_antisym_matrix(m2); - - // clang-format off - std::vector expected_rot2 = { 0.995004165278026, -0.0998334166468282, - 0.0998334166468282, 0.995004165278026 }; - // clang-format on - - ValueMatrix expected_m2(expected_rot2.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); - CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - - - // clang-format off - std::vector m3_input_data = { 0.0, -0.3, -0.1, - 0.3, 0.0, -0.2, - 0.1, 0.2, 0.0 }; - - - std::vector expected_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, - 0.283164960565074, 0.935754803277919, -0.210191705950743, - 0.127334574917630, 0.180540076694398, 0.975290308953046 }; - - // clang-format on - - ValueMatrix m3(m3_input_data.data(), 3, 3); - ValueMatrix expected_m3(expected_rot3.data(), 3, 3); - - RotatedSPOs::exponentiate_antisym_matrix(m3); - - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); - CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } -} - -TEST_CASE("RotatedSPOs log matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - std::vector mat1_data = {1.0}; - SPOSet::ValueMatrix m1(mat1_data.data(), 1, 1); - SPOSet::ValueMatrix out_m1(1, 1); - RotatedSPOs::log_antisym_matrix(m1, out_m1); - // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) - CHECK(out_m1(0, 0) == ValueApprox(0.0)); - - // clang-format off - std::vector start_rot2 = { 0.995004165278026, -0.0998334166468282, - 0.0998334166468282, 0.995004165278026 }; - - std::vector mat2_data = { 0.0, -0.1, - 0.1, 0.0 }; - // clang-format on - - ValueMatrix rot_m2(start_rot2.data(), 2, 2); - ValueMatrix out_m2(2, 2); - RotatedSPOs::log_antisym_matrix(rot_m2, out_m2); - - SPOSet::ValueMatrix m2(mat2_data.data(), 2, 2); - CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); - CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } - - // clang-format off - std::vector start_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, - 0.283164960565074, 0.935754803277919, -0.210191705950743, - 0.127334574917630, 0.180540076694398, 0.975290308953046 }; - - std::vector m3_input_data = { 0.0, -0.3, -0.1, - 0.3, 0.0, -0.2, - 0.1, 0.2, 0.0 }; - // clang-format on - ValueMatrix rot_m3(start_rot3.data(), 3, 3); - ValueMatrix out_m3(3, 3); - RotatedSPOs::log_antisym_matrix(rot_m3, out_m3); - - SPOSet::ValueMatrix m3(m3_input_data.data(), 3, 3); - CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); - CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } -} - -// Test round trip A -> exp(A) -> log(exp(A)) -// The log is multi-valued so this test may fail if the rotation parameters are too large. -// The exponentials will be the same, though -// exp(log(exp(A))) == exp(A) -TEST_CASE("RotatedSPOs exp-log matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - RotatedSPOs::RotationIndices rot_ind; - int nel = 2; - int nmo = 4; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); - - std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; - - RotatedSPOs::constructAntiSymmetricMatrix(rot_ind, params4, rot_m4); - ValueMatrix orig_rot_m4 = rot_m4; - ValueMatrix out_m4(nmo, nmo); - - RotatedSPOs::exponentiate_antisym_matrix(rot_m4); - - RotatedSPOs::log_antisym_matrix(rot_m4, out_m4); - - CheckMatrixResult check_matrix_result4 = checkMatrix(out_m4, orig_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - - std::vector params4out(4); - RotatedSPOs::extractParamsFromAntiSymmetricMatrix(rot_ind, out_m4, params4out); - for (int i = 0; i < params4.size(); i++) - { - CHECK(params4[i] == Approx(params4out[i])); - } -} - -TEST_CASE("RotatedSPOs hcpBe", "[wavefunction]") -{ - using RealType = QMCTraits::RealType; - Communicate* c = OHMMS::Controller; - - ParticleSet::ParticleLayout lattice; - lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, - 0.00000000, 0.00000000, 0.00000000, 6.78114995}; - - ParticleSetPool ptcl = ParticleSetPool(c); - ptcl.setSimulationCell(lattice); - auto ions_uptr = std::make_unique(ptcl.getSimulationCell()); - auto elec_uptr = std::make_unique(ptcl.getSimulationCell()); - ParticleSet& ions(*ions_uptr); - ParticleSet& elec(*elec_uptr); - - ions.setName("ion"); - ptcl.addParticleSet(std::move(ions_uptr)); - ions.create({1}); - ions.R[0] = {0.0, 0.0, 0.0}; - - elec.setName("elec"); - ptcl.addParticleSet(std::move(elec_uptr)); - elec.create({1}); - elec.R[0] = {0.0, 0.0, 0.0}; - - SpeciesSet& tspecies = elec.getSpeciesSet(); - int upIdx = tspecies.addSpecies("u"); - int chargeIdx = tspecies.addAttribute("charge"); - tspecies(chargeIdx, upIdx) = -1; - - // Add the attribute save_coefs="yes" to the sposet_builder tag to generate the - // spline file for use in eval_bspline_spo.py - - const char* particles = R"( - - - -)"; - - Libxml2Document doc; - bool okay = doc.parseFromString(particles); - REQUIRE(okay); - - xmlNodePtr root = doc.getRoot(); - - xmlNodePtr sposet_builder = xmlFirstElementChild(root); - xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); - - EinsplineSetBuilder einSet(elec, ptcl.getPool(), c, sposet_builder); - auto spo = einSet.createSPOSetFromXML(sposet_ptr); - REQUIRE(spo); - - spo->storeParamsBeforeRotation(); - auto rot_spo = std::make_unique("one_rotated_set", std::move(spo)); - - // Sanity check for orbs. Expect 1 electron, 2 orbitals - const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); - REQUIRE(orbitalsetsize == 2); - - rot_spo->buildOptVariables(elec.R.size()); - - SPOSet::ValueMatrix psiM_bare(elec.R.size(), orbitalsetsize); - SPOSet::GradMatrix dpsiM_bare(elec.R.size(), orbitalsetsize); - SPOSet::ValueMatrix d2psiM_bare(elec.R.size(), orbitalsetsize); - rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - - // Values generated from eval_bspline_spo.py, the generate_point_values_hcpBe function - CHECK(std::real(psiM_bare[0][0]) == Approx(0.210221765375514)); - CHECK(std::real(psiM_bare[0][1]) == Approx(-2.984345024542937e-06)); - - CHECK(std::real(d2psiM_bare[0][0]) == Approx(5.303848362116568)); - - opt_variables_type opt_vars; - rot_spo->checkInVariablesExclusive(opt_vars); - opt_vars.resetIndex(); - rot_spo->checkOutVariables(opt_vars); - rot_spo->resetParametersExclusive(opt_vars); - - using ValueType = QMCTraits::ValueType; - Vector dlogpsi(1); - Vector dhpsioverpsi(1); - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - - CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); - CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); - - std::vector params = {0.1}; - rot_spo->apply_rotation(params, false); - - rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); - CHECK(std::real(psiM_bare[0][0]) == Approx(0.20917123424337608)); - CHECK(std::real(psiM_bare[0][1]) == Approx(-0.02099012652669549)); - - CHECK(std::real(d2psiM_bare[0][0]) == Approx(5.277362065087747)); - - dlogpsi[0] = 0.0; - dhpsioverpsi[0] = 0.0; - - rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); - CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); - CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); -} - -// Test construction of delta rotation -TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction]") -{ - using ValueType = SPOSet::ValueType; - using ValueMatrix = SPOSet::ValueMatrix; - - int nel = 2; - int nmo = 4; - RotatedSPOs::RotationIndices rot_ind; - RotatedSPOs::createRotationIndices(nel, nmo, rot_ind); - RotatedSPOs::RotationIndices full_rot_ind; - RotatedSPOs::createRotationIndicesFull(nel, nmo, full_rot_ind); - // rot_ind size is 4 and full rot_ind size is 6 - - ValueMatrix rot_m4(nmo, nmo); - rot_m4 = ValueType(0); - - // When comparing with gen_matrix_ops.py, be aware of the order of indices - // in full_rot - // rot_ind is (0,2) (0,3) (1,2) (1,3) - // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) - // The extra indices go at the back - std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; - std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; - std::vector new_params(6); - - RotatedSPOs::constructDeltaRotation(delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); - - // clang-format off - std::vector rot_data4 = - { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, - -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, - 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, - 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; - // clang-format on - - ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); - - CheckMatrixResult check_matrix_result4 = checkMatrix(rot_m4, new_rot_m4, true); - CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } - - // Reminder: Ordering! - std::vector expected_new_param = {1.6813965019790489, 0.3623564254653294, -0.05486544454559908, - -0.20574472941408453, -0.9542513302873077, 0.27497788909911774}; - for (int i = 0; i < new_params.size(); i++) - CHECK(new_params[i] == Approx(expected_new_param[i])); - - - // Rotated back to original position - - std::vector new_params2(6); - std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; - RotatedSPOs::constructDeltaRotation(reverse_delta_params, new_params, rot_ind, full_rot_ind, new_params2, rot_m4); - for (int i = 0; i < new_params2.size(); i++) - CHECK(new_params2[i] == Approx(old_params[i])); -} - -namespace testing -{ -opt_variables_type& getMyVars(SPOSet& rot) { return rot.myVars; } -opt_variables_type& getMyVarsFull(RotatedSPOs& rot) { return rot.myVarsFull; } -std::vector>& getHistoryParams(RotatedSPOs& rot) { return rot.history_params_; } -} // namespace testing - -// Test using global rotation -TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction]") -{ - auto fake_spo = std::make_unique(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOs rot("fake_rot", std::move(fake_spo)); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSet vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); - - { - hdf_archive hout; - vs.writeToHDF("rot_vp.h5", hout); - - rot.writeVariationalParameters(hout); - } - - auto fake_spo2 = std::make_unique(); - fake_spo2->setOrbitalSetSize(4); - - RotatedSPOs rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); - - optimize::VariableSet vs2; - rot2.checkInVariablesExclusive(vs2); - - hdf_archive hin; - vs2.readFromHDF("rot_vp.h5", hin); - rot2.readVariationalParameters(hin); - - opt_variables_type& var = testing::getMyVars(rot2); - CHECK(var[0] == Approx(vs[0])); - CHECK(var[1] == Approx(vs[1])); - CHECK(var[2] == Approx(vs[2])); - CHECK(var[3] == Approx(vs[3])); - - opt_variables_type& full_var = testing::getMyVarsFull(rot2); - CHECK(full_var[0] == Approx(vs[0])); - CHECK(full_var[1] == Approx(vs[1])); - CHECK(full_var[2] == Approx(vs[2])); - CHECK(full_var[3] == Approx(vs[3])); - CHECK(full_var[4] == Approx(0.0)); - CHECK(full_var[5] == Approx(0.0)); -} - -// Test using history list. -TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction]") -{ - auto fake_spo = std::make_unique(); - fake_spo->setOrbitalSetSize(4); - RotatedSPOs rot("fake_rot", std::move(fake_spo)); - rot.set_use_global_rotation(false); - int nel = 2; - rot.buildOptVariables(nel); - - optimize::VariableSet vs; - rot.checkInVariablesExclusive(vs); - vs[0] = 0.1; - vs[1] = 0.15; - vs[2] = 0.2; - vs[3] = 0.25; - rot.resetParametersExclusive(vs); - - { - hdf_archive hout; - vs.writeToHDF("rot_vp_hist.h5", hout); - - rot.writeVariationalParameters(hout); - } - - auto fake_spo2 = std::make_unique(); - fake_spo2->setOrbitalSetSize(4); - - RotatedSPOs rot2("fake_rot", std::move(fake_spo2)); - rot2.buildOptVariables(nel); - - optimize::VariableSet vs2; - rot2.checkInVariablesExclusive(vs2); - - hdf_archive hin; - vs2.readFromHDF("rot_vp_hist.h5", hin); - rot2.readVariationalParameters(hin); - - opt_variables_type& var = testing::getMyVars(rot2); - CHECK(var[0] == Approx(vs[0])); - CHECK(var[1] == Approx(vs[1])); - CHECK(var[2] == Approx(vs[2])); - CHECK(var[3] == Approx(vs[3])); - - auto hist = testing::getHistoryParams(rot2); - REQUIRE(hist.size() == 1); - REQUIRE(hist[0].size() == 4); -} - -class DummySPOSetWithoutMW : public SPOSet -{ -public: - DummySPOSetWithoutMW(const std::string& my_name) : SPOSet(my_name) {} - void setOrbitalSetSize(int norbs) override {} - void evaluateValue(const ParticleSet& P, int iat, SPOSet::ValueVector& psi) override - { - assert(psi.size() == 3); - psi[0] = 123; - psi[1] = 456; - psi[2] = 789; - } - void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override {} - void evaluate_notranspose(const ParticleSet& P, - int first, - int last, - ValueMatrix& logdet, - GradMatrix& dlogdet, - ValueMatrix& d2logdet) override - {} - std::string getClassName() const override { return my_name_; } -}; - -class DummySPOSetWithMW : public DummySPOSetWithoutMW -{ -public: - DummySPOSetWithMW(const std::string& my_name) : DummySPOSetWithoutMW(my_name) {} - void mw_evaluateValue(const RefVectorWithLeader& spo_list, - const RefVectorWithLeader& P_list, - int iat, - const RefVector& psi_v_list) const override - { - for (auto& psi : psi_v_list) - { - assert(psi.get().size() == 3); - psi.get()[0] = 321; - psi.get()[1] = 654; - psi.get()[2] = 987; - } - } -}; - -TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction]") -{ - //checking that mw_ API works in RotatedSPOs and is not defaulting to - //SPOSet default implementation - { - //First check calling the mw_ APIs for RotatedSPOs, for which the - //underlying implementation just calls the underlying SPOSet mw_ API - //In the case that the underlying SPOSet doesn't specialize the mw_ API, - //the underlying SPOSet will fall back to the default SPOSet mw_, which is - //just a loop over the single walker API. - RotatedSPOs rot_spo0("rotated0", std::make_unique("no mw 0")); - RotatedSPOs rot_spo1("rotated1", std::make_unique("no mw 1")); - RefVectorWithLeader spo_list(rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock mw_sposet_lock(spo_res, spo_list); - - const SimulationCell simulation_cell; - ParticleSet elec0(simulation_cell); - ParticleSet elec1(simulation_cell); - RefVectorWithLeader p_list(elec0, {elec0, elec1}); - - SPOSet::ValueVector psi0(3); - SPOSet::ValueVector psi1(3); - RefVector psi_v_list{psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) - { - CHECK(psi_v_list[iw].get()[0] == Approx(123)); - CHECK(psi_v_list[iw].get()[1] == Approx(456)); - CHECK(psi_v_list[iw].get()[2] == Approx(789)); - } - } - { - //In the case that the underlying SPOSet DOES have mw_ specializations, - //we want to make sure that RotatedSPOs are triggering that appropriately - //This will mean that the underlying SPOSets will do the appropriate offloading - //To check this, DummySPOSetWithMW has an explicit mw_evaluateValue which sets - //different values than what gets set in evaluateValue. By doing this, - //we are ensuring that RotatedSPOs->mw_evaluaeValue is calling the specialization - //in the underlying SPO and not using the default SPOSet implementation which - //loops over single walker APIs (which have different values enforced in - // DummySPOSetWithoutMW - - RotatedSPOs rot_spo0("rotated0", std::make_unique("mw 0")); - RotatedSPOs rot_spo1("rotated1", std::make_unique("mw 1")); - RefVectorWithLeader spo_list(rot_spo0, {rot_spo0, rot_spo1}); - - ResourceCollection spo_res("test_rot_res"); - rot_spo0.createResource(spo_res); - ResourceCollectionTeamLock mw_sposet_lock(spo_res, spo_list); - - const SimulationCell simulation_cell; - ParticleSet elec0(simulation_cell); - ParticleSet elec1(simulation_cell); - RefVectorWithLeader p_list(elec0, {elec0, elec1}); - - SPOSet::ValueVector psi0(3); - SPOSet::ValueVector psi1(3); - RefVector psi_v_list{psi0, psi1}; - - rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); - for (int iw = 0; iw < spo_list.size(); iw++) - { - CHECK(psi_v_list[iw].get()[0] == Approx(321)); - CHECK(psi_v_list[iw].get()[1] == Approx(654)); - CHECK(psi_v_list[iw].get()[2] == Approx(987)); - } - } -} - -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp new file mode 100644 index 0000000000..27aff1ebfd --- /dev/null +++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp @@ -0,0 +1,909 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2022 QMCPACK developers. +// +// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories +// +// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories +////////////////////////////////////////////////////////////////////////////////////// + +#include "FakeSPOT.h" +#include "OhmmsData/Libxml2Doc.h" +#include "OhmmsPETE/OhmmsMatrix.h" +#include "Particle/ParticleSetPoolT.h" +#include "Particle/ParticleSetT.h" +#include "QMCWaveFunctions/EinsplineSetBuilderT.h" +#include "QMCWaveFunctions/RotatedSPOsT.h" +#include "QMCWaveFunctions/WaveFunctionComponent.h" +#include "catch.hpp" +#include "checkMatrix.hpp" +#include "type_traits/ConvertToReal.h" +#include "type_traits/template_types.hpp" +#include +#include + +#include +#include +#include + +using std::string; + +namespace qmcplusplus +{ +template +struct ValueApproxHelper +{ + using Type = Catch::Detail::Approx; +}; +template +struct ValueApproxHelper> +{ + using Type = Catch::Detail::ComplexApprox; +}; + +template +using ValueApprox = typename ValueApproxHelper::Type; + +namespace testing +{ +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } +OptVariablesTypeT& getMyVars(SPOSetT& rot) { return rot.myVars; } +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; } +OptVariablesTypeT& getMyVarsFull(RotatedSPOsT& rot) { return rot.myVarsFull; } +std::vector>& getHistoryParams(RotatedSPOsT& rot) { return rot.history_params_; } + +std::vector>& getHistoryParams(RotatedSPOsT& rot) { return rot.history_params_; } +} // namespace testing + +#ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION +using TestTypeList = std::tuple; +#else +using TestTypeList = std::tuple; +#endif +#else +using TestTypeList = std::tuple<>; +#endif + +/* + JPT 04.01.2022: Adapted from test_einset.cpp + Test the spline rotated machinery for SplineR2R (extend to others later). +*/ +TEMPLATE_LIST_TEST_CASE("RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList) +{ + using RealType = typename SPOSetT::RealType; + + /* + BEGIN Boilerplate stuff to make a simple SPOSet. Copied from + test_einset.cpp + */ + + Communicate* c = OHMMS::Controller; + + // We get a "Mismatched supercell lattices" error due to default ctor? + typename ParticleSetT::ParticleLayout lattice; + + // diamondC_1x1x1 + lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, 3.37316115, 0.0, 3.37316115}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + // LAttice seems fine after this point... + + auto ions_uptr = std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions_(*ions_uptr); + ParticleSetT& elec_(*elec_uptr); + + ions_.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions_.create({2}); + ions_.R[0] = {0.0, 0.0, 0.0}; + ions_.R[1] = {1.68658058, 1.68658058, 1.68658058}; + elec_.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec_.create({2}); + elec_.R[0] = {0.0, 0.0, 0.0}; + elec_.R[1] = {0.0, 1.0, 0.0}; + SpeciesSet& tspecies = elec_.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // diamondC_1x1x1 - 8 bands available + const char* particles = R"( + + +)"; + + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); + + xmlNodePtr root = doc.getRoot(); + + xmlNodePtr ein1 = xmlFirstElementChild(root); + + EinsplineSetBuilderT einSet(elec_, ptcl.getPool(), c, ein1); + auto spo = einSet.createSPOSetFromXML(ein1); + REQUIRE(spo); + + /* + END Boilerplate stuff. Now we have a SplineR2R wavefunction + ready for rotation. What follows is the actual test. + */ + + // SplineR2R only for the moment, so skip if QMC_COMPLEX is set +#if !defined(QMC_COMPLEX) + + spo->storeParamsBeforeRotation(); + // 1.) Make a RotatedSPOs object so that we can use the rotation routines + auto rot_spo = std::make_unique>("one_rotated_set", std::move(spo)); + + // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb. + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 8); + + // 2.) Get data for unrotated orbitals. Check that there's no rotation + rot_spo->buildOptVariables(elec_.R.size()); + typename SPOSetT::ValueMatrix psiM_bare(elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare(elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare(elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // This stuff checks that no rotation was applied. Copied from + // test_einset.cpp. value + CHECK(std::real(psiM_bare[1][0]) == ValueApprox(-0.8886948824)); + CHECK(std::real(psiM_bare[1][1]) == ValueApprox(1.4194120169)); + // grad + CHECK(std::real(dpsiM_bare[1][0][0]) == ValueApprox(-0.0000183403)); + CHECK(std::real(dpsiM_bare[1][0][1]) == ValueApprox(0.1655139178)); + CHECK(std::real(dpsiM_bare[1][0][2]) == ValueApprox(-0.0000193077)); + CHECK(std::real(dpsiM_bare[1][1][0]) == ValueApprox(-1.3131694794)); + CHECK(std::real(dpsiM_bare[1][1][1]) == ValueApprox(-1.1174004078)); + CHECK(std::real(dpsiM_bare[1][1][2]) == ValueApprox(-0.8462534547)); + // lapl + CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox(1.3313053846)); + CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox(-4.712583065)); + + /* + 3.) Apply a rotation to the orbitals + To do this, construct a params vector and call the + RotatedSPOs::apply_rotation(params) method. That should do the + right thing for this particular spline class. + + For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params. + */ + const auto rot_size = rot_spo->m_act_rot_inds.size(); + REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12 + std::vector param(rot_size); + for (auto i = 0; i < rot_size; i++) + { + param[i] = 0.01 * static_cast(i); + } + rot_spo->apply_rotation(param, false); // Expect this to call SplineR2R::applyRotation() + + // 4.) Get data for rotated orbitals. + typename SPOSetT::ValueMatrix psiM_rot(elec_.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_rot(elec_.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_rot(elec_.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot); + + /* + Manually encode the unitary transformation. Ugly, but it works. + @TODO: Use the total rotation machinery when it's implemented + + NB: This is truncated to 5 sig-figs, so there is some slop here as + compared to what is done in the splines via apply_rotation(). + So below we reduce the threshold for comparison. This can + probably be ditched once we have a way to grab the actual + rotation matrix... + */ + typename SPOSetT::ValueMatrix rot_mat(orbitalsetsize, orbitalsetsize); + rot_mat[0][0] = 0.99726; + rot_mat[0][1] = -0.00722; + rot_mat[0][2] = 0.00014; + rot_mat[0][3] = -0.00982; + rot_mat[0][4] = -0.01979; + rot_mat[0][5] = -0.02976; + rot_mat[0][6] = -0.03972; + rot_mat[0][7] = -0.04969; + rot_mat[1][0] = -0.00722; + rot_mat[1][1] = 0.97754; + rot_mat[1][2] = -0.05955; + rot_mat[1][3] = -0.06945; + rot_mat[1][4] = -0.07935; + rot_mat[1][5] = -0.08925; + rot_mat[1][6] = -0.09915; + rot_mat[1][7] = -0.10905; + rot_mat[2][0] = -0.00014; + rot_mat[2][1] = 0.05955; + rot_mat[2][2] = 0.99821; + rot_mat[2][3] = -0.00209; + rot_mat[2][4] = -0.00239; + rot_mat[2][5] = -0.00269; + rot_mat[2][6] = -0.00299; + rot_mat[2][7] = -0.00329; + rot_mat[3][0] = 0.00982; + rot_mat[3][1] = 0.06945; + rot_mat[3][2] = -0.00209; + rot_mat[3][3] = 0.99751; + rot_mat[3][4] = -0.00289; + rot_mat[3][5] = -0.00329; + rot_mat[3][6] = -0.00368; + rot_mat[3][7] = -0.00408; + rot_mat[4][0] = 0.01979; + rot_mat[4][1] = 0.07935; + rot_mat[4][2] = -0.00239; + rot_mat[4][3] = -0.00289; + rot_mat[4][4] = 0.99661; + rot_mat[4][5] = -0.00388; + rot_mat[4][6] = -0.00438; + rot_mat[4][7] = -0.00488; + rot_mat[5][0] = 0.02976; + rot_mat[5][1] = 0.08925; + rot_mat[5][2] = -0.00269; + rot_mat[5][3] = -0.00329; + rot_mat[5][4] = -0.00388; + rot_mat[5][5] = 0.99552; + rot_mat[5][6] = -0.00508; + rot_mat[5][7] = -0.00568; + rot_mat[6][0] = 0.03972; + rot_mat[6][1] = 0.09915; + rot_mat[6][2] = -0.00299; + rot_mat[6][3] = -0.00368; + rot_mat[6][4] = -0.00438; + rot_mat[6][5] = -0.00508; + rot_mat[6][6] = 0.99422; + rot_mat[6][7] = -0.00647; + rot_mat[7][0] = 0.04969; + rot_mat[7][1] = 0.10905; + rot_mat[7][2] = -0.00329; + rot_mat[7][3] = -0.00408; + rot_mat[7][4] = -0.00488; + rot_mat[7][5] = -0.00568; + rot_mat[7][6] = -0.00647; + rot_mat[7][7] = 0.99273; + + // Now compute the expected values by hand using the transformation above + double val1 = 0.; + double val2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) + { + val1 += psiM_bare[0][i] * rot_mat[i][0]; + val2 += psiM_bare[1][i] * rot_mat[i][0]; + } + + // value + CHECK(std::real(psiM_rot[0][0]) == ValueApprox(val1)); + CHECK(std::real(psiM_rot[1][0]) == ValueApprox(val2)); + + std::vector grad1(3); + std::vector grad2(3); + for (auto j = 0; j < grad1.size(); j++) + { + for (auto i = 0; i < rot_mat.size1(); i++) + { + grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0]; + grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0]; + } + } + + // grad + CHECK(dpsiM_rot[0][0][0] == ValueApprox(grad1[0]).epsilon(0.0001)); + CHECK(dpsiM_rot[0][0][1] == ValueApprox(grad1[1]).epsilon(0.0001)); + CHECK(dpsiM_rot[0][0][2] == ValueApprox(grad1[2]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][0] == ValueApprox(grad2[0]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][1] == ValueApprox(grad2[1]).epsilon(0.0001)); + CHECK(dpsiM_rot[1][0][2] == ValueApprox(grad2[2]).epsilon(0.0001)); + + double lap1 = 0.; + double lap2 = 0.; + for (auto i = 0; i < rot_mat.size1(); i++) + { + lap1 += d2psiM_bare[0][i] * rot_mat[i][0]; + lap2 += d2psiM_bare[1][i] * rot_mat[i][0]; + } + + // Lapl + CHECK(std::real(d2psiM_rot[0][0]) == ValueApprox(lap1).epsilon(0.0001)); + CHECK(std::real(d2psiM_rot[1][0]) == ValueApprox(lap2).epsilon(0.0001)); + +#endif +} + +TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices", "[wavefunction][template]", TestTypeList) +{ + // No active-active or virtual-virtual rotations + // Only active-virtual + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + CHECK(rot_ind.size() == 2); + + // Full rotation contains all rotations + // Size should be number of pairs of orbitals: nmo*(nmo-1)/2 + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + CHECK(full_rot_ind.size() == 3); + + nel = 2; + typename RotatedSPOsT::RotationIndices rot_ind2; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind2); + CHECK(rot_ind2.size() == 2); + + typename RotatedSPOsT::RotationIndices full_rot_ind2; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind2); + CHECK(full_rot_ind2.size() == 3); + + nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind3); + CHECK(rot_ind3.size() == 4); + + typename RotatedSPOsT::RotationIndices full_rot_ind3; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind3); + CHECK(full_rot_ind3.size() == 6); +} + +TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", "[wavefunction][template]", TestTypeList) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 1; + int nmo = 3; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + + ValueMatrix m3(nmo, nmo); + m3 = ValueType(0); + std::vector params = {0.1, 0.2}; + + RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params, m3); + + // clang-format off + std::vector expected_data = { 0.0, -0.1, -0.2, + 0.1, 0.0, 0.0, + 0.2, 0.0, 0.0 }; + // clang-format on + + ValueMatrix expected_m3(expected_data.data(), 3, 3); + + CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); } + + std::vector params_out(2); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(rot_ind, m3, params_out); + CHECK(params_out[0] == ValueApprox(0.1)); + CHECK(params_out[1] == ValueApprox(0.2)); +} + +// Expected values of the matrix exponential come from gen_matrix_ops.py +TEMPLATE_LIST_TEST_CASE("RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + std::vector::ValueType> mat1_data = {0.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + RotatedSPOsT::exponentiate_antisym_matrix(m1); + // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(m1(0, 0) == ValueApprox(1.0)); + + // clang-format off + std::vector::ValueType> mat2_data = { 0.0, -0.1, + 0.1, 0.0 }; + // clang-format on + + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + RotatedSPOsT::exponentiate_antisym_matrix(m2); + + // clang-format off + std::vector expected_rot2 = { 0.995004165278026, -0.0998334166468282, + 0.0998334166468282, 0.995004165278026 }; + // clang-format on + + ValueMatrix expected_m2(expected_rot2.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true); + CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } + + // clang-format off + std::vector m3_input_data = { 0.0, -0.3, -0.1, + 0.3, 0.0, -0.2, + 0.1, 0.2, 0.0 }; + + + std::vector expected_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, + 0.283164960565074, 0.935754803277919, -0.210191705950743, + 0.127334574917630, 0.180540076694398, 0.975290308953046 }; + + // clang-format on + + ValueMatrix m3(m3_input_data.data(), 3, 3); + ValueMatrix expected_m3(expected_rot3.data(), 3, 3); + + RotatedSPOsT::exponentiate_antisym_matrix(m3); + + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true); + CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } +} + +TEMPLATE_LIST_TEST_CASE("RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + std::vector::ValueType> mat1_data = {1.0}; + typename SPOSetT::ValueMatrix m1(mat1_data.data(), 1, 1); + typename SPOSetT::ValueMatrix out_m1(1, 1); + RotatedSPOsT::log_antisym_matrix(m1, out_m1); + // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0) + CHECK(out_m1(0, 0) == ValueApprox(0.0)); + + // clang-format off + std::vector start_rot2 = { 0.995004165278026, -0.0998334166468282, + 0.0998334166468282, 0.995004165278026 }; + + std::vector::ValueType> mat2_data = { 0.0, -0.1, + 0.1, 0.0 }; + // clang-format on + + ValueMatrix rot_m2(start_rot2.data(), 2, 2); + ValueMatrix out_m2(2, 2); + RotatedSPOsT::log_antisym_matrix(rot_m2, out_m2); + + typename SPOSetT::ValueMatrix m2(mat2_data.data(), 2, 2); + CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true); + CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); } + + // clang-format off + std::vector start_rot3 = { 0.950580617906092, -0.302932713402637, -0.0680313164049401, + 0.283164960565074, 0.935754803277919, -0.210191705950743, + 0.127334574917630, 0.180540076694398, 0.975290308953046 }; + + std::vector m3_input_data = { 0.0, -0.3, -0.1, + 0.3, 0.0, -0.2, + 0.1, 0.2, 0.0 }; + // clang-format on + ValueMatrix rot_m3(start_rot3.data(), 3, 3); + ValueMatrix out_m3(3, 3); + RotatedSPOsT::log_antisym_matrix(rot_m3, out_m3); + + typename SPOSetT::ValueMatrix m3(m3_input_data.data(), 3, 3); + CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true); + CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); } +} + +// Test round trip A -> exp(A) -> log(exp(A)) +// The log is multi-valued so this test may fail if the rotation parameters are +// too large. The exponentials will be the same, though +// exp(log(exp(A))) == exp(A) +TEMPLATE_LIST_TEST_CASE("RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + typename RotatedSPOsT::RotationIndices rot_ind; + int nel = 2; + int nmo = 4; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); + + std::vector params4 = {-1.1, 1.5, 0.2, -0.15}; + + RotatedSPOsT::constructAntiSymmetricMatrix(rot_ind, params4, rot_m4); + ValueMatrix orig_rot_m4 = rot_m4; + ValueMatrix out_m4(nmo, nmo); + + RotatedSPOsT::exponentiate_antisym_matrix(rot_m4); + + RotatedSPOsT::log_antisym_matrix(rot_m4, out_m4); + + CheckMatrixResult check_matrix_result4 = checkMatrix(out_m4, orig_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } + + std::vector params4out(4); + RotatedSPOsT::extractParamsFromAntiSymmetricMatrix(rot_ind, out_m4, params4out); + for (int i = 0; i < params4.size(); i++) + { + CHECK(params4[i] == ValueApprox(params4out[i])); + } +} + +TEMPLATE_LIST_TEST_CASE("RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList) +{ + using RealType = typename OrbitalSetTraits::RealType; + Communicate* c = OHMMS::Controller; + + typename ParticleSetT::ParticleLayout lattice; + lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142, + 0.00000000, 0.00000000, 0.00000000, 6.78114995}; + + ParticleSetPoolT ptcl = ParticleSetPoolT(c); + ptcl.setSimulationCell(lattice); + auto ions_uptr = std::make_unique>(ptcl.getSimulationCell()); + auto elec_uptr = std::make_unique>(ptcl.getSimulationCell()); + ParticleSetT& ions(*ions_uptr); + ParticleSetT& elec(*elec_uptr); + + ions.setName("ion"); + ptcl.addParticleSet(std::move(ions_uptr)); + ions.create({1}); + ions.R[0] = {0.0, 0.0, 0.0}; + + elec.setName("elec"); + ptcl.addParticleSet(std::move(elec_uptr)); + elec.create({1}); + elec.R[0] = {0.0, 0.0, 0.0}; + + SpeciesSet& tspecies = elec.getSpeciesSet(); + int upIdx = tspecies.addSpecies("u"); + int chargeIdx = tspecies.addAttribute("charge"); + tspecies(chargeIdx, upIdx) = -1; + + // Add the attribute save_coefs="yes" to the sposet_builder tag to generate + // the spline file for use in eval_bspline_spo.py + + const char* particles = R"( + + + +)"; + + Libxml2Document doc; + bool okay = doc.parseFromString(particles); + REQUIRE(okay); + + xmlNodePtr root = doc.getRoot(); + + xmlNodePtr sposet_builder = xmlFirstElementChild(root); + xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder); + + EinsplineSetBuilderT einSet(elec, ptcl.getPool(), c, sposet_builder); + auto spo = einSet.createSPOSetFromXML(sposet_ptr); + REQUIRE(spo); + + spo->storeParamsBeforeRotation(); + auto rot_spo = std::make_unique>("one_rotated_set", std::move(spo)); + + // Sanity check for orbs. Expect 1 electron, 2 orbitals + const auto orbitalsetsize = rot_spo->getOrbitalSetSize(); + REQUIRE(orbitalsetsize == 2); + + rot_spo->buildOptVariables(elec.R.size()); + + typename SPOSetT::ValueMatrix psiM_bare(elec.R.size(), orbitalsetsize); + typename SPOSetT::GradMatrix dpsiM_bare(elec.R.size(), orbitalsetsize); + typename SPOSetT::ValueMatrix d2psiM_bare(elec.R.size(), orbitalsetsize); + rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + + // Values generated from eval_bspline_spo.py, the + // generate_point_values_hcpBe function + CHECK(std::real(psiM_bare[0][0]) == ValueApprox(0.210221765375514)); + CHECK(std::real(psiM_bare[0][1]) == ValueApprox(-2.984345024542937e-06)); + + CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox(5.303848362116568)); + + OptVariablesTypeT opt_vars; + rot_spo->checkInVariablesExclusive(opt_vars); + opt_vars.resetIndex(); + rot_spo->checkOutVariables(opt_vars); + rot_spo->resetParametersExclusive(opt_vars); + + using ValueType = TestType; + Vector dlogpsi(1); + Vector dhpsioverpsi(1); + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); + + CHECK(dlogpsi[0] == ValueApprox(-1.41961753e-05)); + CHECK(dhpsioverpsi[0] == ValueApprox(-0.00060853)); + + std::vector params = {0.1}; + rot_spo->apply_rotation(params, false); + + rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare); + CHECK(std::real(psiM_bare[0][0]) == ValueApprox(0.20917123424337608)); + CHECK(std::real(psiM_bare[0][1]) == ValueApprox(-0.02099012652669549)); + + CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox(5.277362065087747)); + + dlogpsi[0] = 0.0; + dhpsioverpsi[0] = 0.0; + + rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1); + CHECK(dlogpsi[0] == ValueApprox(-0.10034901119468914)); + CHECK(dhpsioverpsi[0] == ValueApprox(32.96939041498753)); +} + +// Test construction of delta rotation +TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction][template]", TestTypeList) +{ + using ValueType = typename SPOSetT::ValueType; + using ValueMatrix = typename SPOSetT::ValueMatrix; + + int nel = 2; + int nmo = 4; + typename RotatedSPOsT::RotationIndices rot_ind; + RotatedSPOsT::createRotationIndices(nel, nmo, rot_ind); + typename RotatedSPOsT::RotationIndices full_rot_ind; + RotatedSPOsT::createRotationIndicesFull(nel, nmo, full_rot_ind); + // rot_ind size is 4 and full rot_ind size is 6 + + ValueMatrix rot_m4(nmo, nmo); + rot_m4 = ValueType(0); + + // When comparing with gen_matrix_ops.py, be aware of the order of indices + // in full_rot + // rot_ind is (0,2) (0,3) (1,2) (1,3) + // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3) + // The extra indices go at the back + std::vector old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05}; + std::vector delta_params = {0.1, 0.3, 0.2, -0.1}; + std::vector new_params(6); + + RotatedSPOsT::constructDeltaRotation(delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4); + + // clang-format off + std::vector rot_data4 = + { -0.371126931484737, 0.491586564957393, -0.784780958819798, 0.0687480658200083, + -0.373372784561548, 0.66111547793048, 0.610450337985578, 0.225542620014052, + 0.751270334458895, 0.566737323353515, -0.0297901110611425, -0.336918744155143, + 0.398058348785074, 0.00881931472604944, -0.102867783149713, 0.911531672428406 }; + // clang-format on + + ValueMatrix new_rot_m4(rot_data4.data(), 4, 4); + + CheckMatrixResult check_matrix_result4 = checkMatrix(rot_m4, new_rot_m4, true); + CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); } + + // Reminder: Ordering! + std::vector expected_new_param = {1.6813965019790489, 0.3623564254653294, -0.05486544454559908, + -0.20574472941408453, -0.9542513302873077, 0.27497788909911774}; + for (int i = 0; i < new_params.size(); i++) + CHECK(new_params[i] == ValueApprox(expected_new_param[i])); + + // Rotated back to original position + + std::vector new_params2(6); + std::vector reverse_delta_params = {-0.1, -0.3, -0.2, 0.1}; + RotatedSPOsT::constructDeltaRotation(reverse_delta_params, new_params, rot_ind, full_rot_ind, new_params2, + rot_m4); + for (int i = 0; i < new_params2.size(); i++) + CHECK(new_params2[i] == ValueApprox(old_params[i])); +} + +// Test using global rotation +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction][template]", TestTypeList) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto& full_var = testing::getMyVarsFull(rot2); + CHECK(full_var[0] == ValueApprox(vs[0])); + CHECK(full_var[1] == ValueApprox(vs[1])); + CHECK(full_var[2] == ValueApprox(vs[2])); + CHECK(full_var[3] == ValueApprox(vs[3])); + CHECK(full_var[4] == ValueApprox(0.0)); + CHECK(full_var[5] == ValueApprox(0.0)); +} + +// Test using history list. +TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction][template]", TestTypeList) +{ + auto fake_spo = std::make_unique>(); + fake_spo->setOrbitalSetSize(4); + RotatedSPOsT rot("fake_rot", std::move(fake_spo)); + rot.set_use_global_rotation(false); + int nel = 2; + rot.buildOptVariables(nel); + + optimize::VariableSetT vs; + rot.checkInVariablesExclusive(vs); + vs[0] = 0.1; + vs[1] = 0.15; + vs[2] = 0.2; + vs[3] = 0.25; + rot.resetParametersExclusive(vs); + + { + hdf_archive hout; + vs.writeToHDF("rot_vp_hist.h5", hout); + + rot.writeVariationalParameters(hout); + } + + auto fake_spo2 = std::make_unique>(); + fake_spo2->setOrbitalSetSize(4); + + RotatedSPOsT rot2("fake_rot", std::move(fake_spo2)); + rot2.buildOptVariables(nel); + + optimize::VariableSetT vs2; + rot2.checkInVariablesExclusive(vs2); + + hdf_archive hin; + vs2.readFromHDF("rot_vp_hist.h5", hin); + rot2.readVariationalParameters(hin); + + auto& var = testing::getMyVars(rot2); + CHECK(var[0] == ValueApprox(vs[0])); + CHECK(var[1] == ValueApprox(vs[1])); + CHECK(var[2] == ValueApprox(vs[2])); + CHECK(var[3] == ValueApprox(vs[3])); + + auto hist = testing::getHistoryParams(rot2); + REQUIRE(hist.size() == 1); + REQUIRE(hist[0].size() == 4); +} + +template +class DummySPOSetWithoutMWT : public SPOSetT +{ +public: + using ValueVector = typename SPOSetT::ValueVector; + using ValueMatrix = typename SPOSetT::ValueMatrix; + using GradVector = typename SPOSetT::GradVector; + using GradMatrix = typename SPOSetT::GradMatrix; + + DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT(my_name) {} + void setOrbitalSetSize(int norbs) override {} + void evaluateValue(const ParticleSetT& P, int iat, typename SPOSetT::ValueVector& psi) override + { + assert(psi.size() == 3); + psi[0] = 123; + psi[1] = 456; + psi[2] = 789; + } + void evaluateVGL(const ParticleSetT& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override + {} + void evaluate_notranspose(const ParticleSetT& P, + int first, + int last, + ValueMatrix& logdet, + GradMatrix& dlogdet, + ValueMatrix& d2logdet) override + {} + std::string getClassName() const override { return this->my_name_; } +}; + +template +class DummySPOSetWithMWT : public DummySPOSetWithoutMWT +{ +public: + using ValueVector = typename DummySPOSetWithoutMWT::ValueVector; + + DummySPOSetWithMWT(const std::string& my_name) : DummySPOSetWithoutMWT(my_name) {} + void mw_evaluateValue(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& P_list, + int iat, + const RefVector& psi_v_list) const override + { + for (auto& psi : psi_v_list) + { + assert(psi.get().size() == 3); + psi.get()[0] = 321; + psi.get()[1] = 654; + psi.get()[2] = 987; + } + } +}; + +TEMPLATE_LIST_TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList) +{ + // checking that mw_ API works in RotatedSPOs and is not defaulting to + // SPOSet default implementation + { + // First check calling the mw_ APIs for RotatedSPOs, for which the + // underlying implementation just calls the underlying SPOSet mw_ API + // In the case that the underlying SPOSet doesn't specialize the mw_ + // API, the underlying SPOSet will fall back to the default SPOSet mw_, + // which is just a loop over the single walker API. + RotatedSPOsT rot_spo0("rotated0", std::make_unique>("no mw 0")); + RotatedSPOsT rot_spo1("rotated1", std::make_unique>("no mw 1")); + RefVectorWithLeader> spo_list(rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock(spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list(elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) + { + CHECK(psi_v_list[iw].get()[0] == ValueApprox(123)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(456)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(789)); + } + } + { + // In the case that the underlying SPOSet DOES have mw_ specializations, + // we want to make sure that RotatedSPOs are triggering that + // appropriately This will mean that the underlying SPOSets will do the + // appropriate offloading To check this, DummySPOSetWithMW has an + // explicit mw_evaluateValue which sets different values than what gets + // set in evaluateValue. By doing this, we are ensuring that + // RotatedSPOs->mw_evaluaeValue is calling the specialization in the + // underlying SPO and not using the default SPOSet implementation which + // loops over single walker APIs (which have different values enforced + // in + // DummySPOSetWithoutMW + + RotatedSPOsT rot_spo0("rotated0", std::make_unique>("mw 0")); + RotatedSPOsT rot_spo1("rotated1", std::make_unique>("mw 1")); + RefVectorWithLeader> spo_list(rot_spo0, {rot_spo0, rot_spo1}); + + ResourceCollection spo_res("test_rot_res"); + rot_spo0.createResource(spo_res); + ResourceCollectionTeamLock> mw_sposet_lock(spo_res, spo_list); + + const SimulationCellT simulation_cell; + ParticleSetT elec0(simulation_cell); + ParticleSetT elec1(simulation_cell); + RefVectorWithLeader> p_list(elec0, {elec0, elec1}); + + typename SPOSetT::ValueVector psi0(3); + typename SPOSetT::ValueVector psi1(3); + RefVector::ValueVector> psi_v_list{psi0, psi1}; + + rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list); + for (int iw = 0; iw < spo_list.size(); iw++) + { + CHECK(psi_v_list[iw].get()[0] == ValueApprox(321)); + CHECK(psi_v_list[iw].get()[1] == ValueApprox(654)); + CHECK(psi_v_list[iw].get()[2] == ValueApprox(987)); + } + } +} + +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp index ed969b392c..463b0e8374 100644 --- a/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp +++ b/src/QMCWaveFunctions/tests/test_TrialWaveFunction_He.cpp @@ -24,6 +24,7 @@ #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "Utilities/RuntimeOptions.h" #include +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp b/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp index 1eebcabd0b..12c3208437 100644 --- a/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp +++ b/src/QMCWaveFunctions/tests/test_cartesian_ao.cpp @@ -19,6 +19,7 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_einset.cpp b/src/QMCWaveFunctions/tests/test_einset.cpp index 8dd00c2621..7cc6a6a8f0 100644 --- a/src/QMCWaveFunctions/tests/test_einset.cpp +++ b/src/QMCWaveFunctions/tests/test_einset.cpp @@ -19,6 +19,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "BsplineFactory/EinsplineSetBuilder.h" #include "BsplineFactory/EinsplineSpinorSetBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include #include diff --git a/src/QMCWaveFunctions/tests/test_einset_NiO_a16.cpp b/src/QMCWaveFunctions/tests/test_einset_NiO_a16.cpp index 6642f86761..094276211b 100644 --- a/src/QMCWaveFunctions/tests/test_einset_NiO_a16.cpp +++ b/src/QMCWaveFunctions/tests/test_einset_NiO_a16.cpp @@ -19,6 +19,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "BsplineFactory/EinsplineSetBuilder.h" #include "BsplineFactory/EinsplineSpinorSetBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include #include diff --git a/src/QMCWaveFunctions/tests/test_einset_spinor.cpp b/src/QMCWaveFunctions/tests/test_einset_spinor.cpp index 83693e52b9..6fb31c1867 100644 --- a/src/QMCWaveFunctions/tests/test_einset_spinor.cpp +++ b/src/QMCWaveFunctions/tests/test_einset_spinor.cpp @@ -21,7 +21,7 @@ #include "QMCWaveFunctions/SPOSetBuilderFactory.h" #include "Utilities/ResourceCollection.h" #include "QMCWaveFunctions/SpinorSet.h" - +#include "QMCWaveFunctions/SPOSet.h" #include #include #include @@ -531,7 +531,7 @@ TEST_CASE("Einspline SpinorSet from HDF", "[wavefunction]") elec_.mw_makeMove(p_list, iat, displs); std::vector accept = {true, true}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } elec_.mw_update(p_list); @@ -612,7 +612,7 @@ TEST_CASE("Einspline SpinorSet from HDF", "[wavefunction]") CHECK(mw_dspin[1][2] == ComplexApprox(dspsiM_ref[(iat + 1) % 3][2]).epsilon(h)); std::vector accept = {false, false}; - elec_.mw_accept_rejectMove(p_list, iat, accept); + elec_.mw_accept_rejectMoveT(p_list, iat, accept); } } diff --git a/src/QMCWaveFunctions/tests/test_example_he.cpp b/src/QMCWaveFunctions/tests/test_example_he.cpp index a8f980f63a..5ab56f0d71 100644 --- a/src/QMCWaveFunctions/tests/test_example_he.cpp +++ b/src/QMCWaveFunctions/tests/test_example_he.cpp @@ -19,6 +19,7 @@ #include "QMCWaveFunctions/WaveFunctionFactory.h" #include "QMCWaveFunctions/ExampleHeComponent.h" #include "Utilities/RuntimeOptions.h" +#include "QMCWaveFunctions/VariableSet.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/tests/test_hybridrep.cpp b/src/QMCWaveFunctions/tests/test_hybridrep.cpp index 956756fc48..2106553a83 100644 --- a/src/QMCWaveFunctions/tests/test_hybridrep.cpp +++ b/src/QMCWaveFunctions/tests/test_hybridrep.cpp @@ -24,6 +24,7 @@ #include "QMCWaveFunctions/WaveFunctionComponent.h" #include "BsplineFactory/EinsplineSetBuilder.h" #include "BsplineFactory/EinsplineSpinorSetBuilder.h" +#include "QMCWaveFunctions/SPOSet.h" #include using std::string; diff --git a/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp b/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp index e5f7f68364..d880d36b86 100644 --- a/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp +++ b/src/QMCWaveFunctions/tests/test_pyscf_complex_MO.cpp @@ -26,7 +26,7 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalBuilder.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" - +#include "QMCWaveFunctions/SPOSet.h" namespace qmcplusplus { void test_C_diamond() diff --git a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp index 1c7e02bff9..166df18c29 100644 --- a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp +++ b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp @@ -21,8 +21,8 @@ #include "Numerics/GaussianBasisSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" -#include "QMCWaveFunctions/LCAO/CuspCorrectionConstruction.h" - +#include "QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h" +#include "QMCWaveFunctions/SPOSet.h" #include "QMCWaveFunctions/SPOSetBuilderFactory.h" namespace qmcplusplus @@ -33,12 +33,13 @@ TEST_CASE("readCuspInfo", "[wavefunction]") using GridType = OneDimGridBase; - Matrix info; + Matrix> info; int num_center = 3; int orbital_set_size = 7; info.resize(num_center, orbital_set_size); - bool okay = readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); + bool okay = + CuspCorrectionConstructionT::readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); REQUIRE(okay); // N @@ -61,7 +62,6 @@ TEST_CASE("readCuspInfo", "[wavefunction]") CHECK(info(2, 4).alpha[4] == Approx(-404.733151049101)); // a5 } - TEST_CASE("applyCuspInfo", "[wavefunction]") { Communicate* c = OHMMS::Controller; @@ -138,16 +138,17 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") using RealType = QMCTraits::RealType; - splitPhiEta(center_idx, corrCenter, phi, eta); + CuspCorrectionConstructionT::splitPhiEta(center_idx, corrCenter, phi, eta); // 1S orbital on N CHECK((*phi.C)(0, 0) == Approx(1.00180500)); CHECK((*eta.C)(0, 0) == Approx(0.0)); int orbital_set_size = 7; - Matrix info; + Matrix> info; info.resize(num_center, orbital_set_size); - okay = readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); + okay = + CuspCorrectionConstructionT::readCuspInfo("hcn_downdet.cuspInfo.xml", "downdet", orbital_set_size, info); REQUIRE(okay); Vector xgrid; @@ -162,7 +163,8 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") rad_orb.resize(ngrid); int mo_idx = 0; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Comparisons generated from gen_cusp_corr.py // Center 0 MO 0 rc = 0.07691307008 @@ -179,7 +181,8 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") mo_idx = 1; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Center 0 MO 1 rc = 0.060909477888 CHECK(rad_orb[0] == Approx(-0.0099816961)); // x = 0.012 @@ -202,14 +205,15 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") // C is second atom center_idx = 1; - splitPhiEta(center_idx, corrCenter, phi, eta); + CuspCorrectionConstructionT::splitPhiEta(center_idx, corrCenter, phi, eta); // 1S orbital on N CHECK((*phi.C)(0, 0) == Approx(0.0)); CHECK((*eta.C)(0, 0) == Approx(1.00180500)); mo_idx = 0; - computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, info(center_idx, mo_idx)); + CuspCorrectionConstructionT::computeRadialPhiBar(&elec, &ions, mo_idx, center_idx, &phi, xgrid, rad_orb, + info(center_idx, mo_idx)); // Center 1 MO 0 rc = 0.105 CHECK(rad_orb[0] == Approx(0.0017535517)); // x = 0.012 @@ -224,7 +228,7 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") CHECK(rad_orb[9] == Approx(0.0010837868)); // x = 0.12 - removeSTypeOrbitals(corrCenter, lcob); + CuspCorrectionConstructionT::removeSTypeOrbitals(corrCenter, lcob); CHECK((*lcob.C)(0, 0) == Approx(0.0)); CHECK((*lcob.C)(0, 1) == Approx(0.0)); @@ -551,7 +555,7 @@ TEST_CASE("Ethanol MO with cusp", "[wavefunction]") TEST_CASE("broadcastCuspInfo", "[wavefunction]") { Communicate* c = OHMMS::Controller; - CuspCorrectionParameters cp; + CuspCorrectionParametersT cp; int root = 0; if (c->rank() == root) { @@ -566,7 +570,7 @@ TEST_CASE("broadcastCuspInfo", "[wavefunction]") cp.redo = 1; } - broadcastCuspInfo(cp, *c, root); + CuspCorrectionConstructionT::broadcastCuspInfo(cp, *c, root); CHECK(cp.Rc == Approx(2.0)); CHECK(cp.C == Approx(3.0)); diff --git a/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp b/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp index b0741c2074..ac0f6f960c 100644 --- a/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp +++ b/src/QMCWaveFunctions/tests/test_spline_applyrotation.cpp @@ -20,6 +20,7 @@ #include "BsplineFactory/EinsplineSpinorSetBuilder.h" #include "QMCWaveFunctions/BsplineFactory/SplineC2C.h" #include "Utilities/for_testing/checkMatrix.hpp" +#include "QMCWaveFunctions/SPOSet.h" #include #include diff --git a/src/mpi/mpi_datatype.h b/src/mpi/mpi_datatype.h index 3750fba976..8f3c58e994 100644 --- a/src/mpi/mpi_datatype.h +++ b/src/mpi/mpi_datatype.h @@ -13,6 +13,8 @@ #ifndef QMCPLUSPLUS_MPI_DATATYPEDEFINE_H #define QMCPLUSPLUS_MPI_DATATYPEDEFINE_H +#include "Message/Communicate.h" + #if defined(HAVE_MPI) #include #else diff --git a/src/spline/test_bspline.h b/src/spline/test_bspline.h index e61fb8bdad..a480b1815e 100644 --- a/src/spline/test_bspline.h +++ b/src/spline/test_bspline.h @@ -86,7 +86,6 @@ void test_bspline(ParticleSet& TargetPtcl, SPE1& a, SPE2& b) { int N = a.OrbitalSetSize; SPOSet::RealType eps = static_cast(numeric_limits::epsilon()); - //SPOSet::RealType eps=1e-6; SPOSet::ValueVector psi_0(N); SPOSet::ValueVector psi_1(N); SPOSet::GradVector dpsi_0(N); diff --git a/src/type_traits/complex_help.hpp b/src/type_traits/complex_help.hpp index 79e0e920a4..76ebeddcbb 100644 --- a/src/type_traits/complex_help.hpp +++ b/src/type_traits/complex_help.hpp @@ -12,6 +12,9 @@ #ifndef QMCPLUSPLUS_COMPLEX_HELP_HPP #define QMCPLUSPLUS_COMPLEX_HELP_HPP +#include +#include + namespace qmcplusplus { template @@ -35,6 +38,22 @@ struct RealAlias_impl> { using value_type = T; }; template struct RealAlias_impl> { using value_type = typename T::value_type; }; +template +struct FullPrec_impl +{}; + +template +struct FullPrec_impl> +{ + using value_type = double; +}; + +template +struct FullPrec_impl> +{ + using value_type = std::complex; +}; + /** If you have a function templated on a value that can be real or complex * and you need to get the base Real type if its complex or just the real. * @@ -44,6 +63,9 @@ struct RealAlias_impl> { using value_type = typename T::value_ty template using RealAlias = typename RealAlias_impl::value_type; +template +using FullPrec = typename FullPrec_impl::value_type; + ///real part of a scalar. Cannot be replaced by std::real due to AFQMC specific needs. inline float real(const float& c) { return c; } inline double real(const double& c) { return c; } From 31a11f825aca0cc267eeda2e7b0ce9ae6ed5f9fd Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 7 Nov 2023 16:34:13 -0500 Subject: [PATCH 2/4] Remove test differences and fix build Signed-off-by: Steven Hahn --- .../LCAO/LCAOrbitalBuilderT.cpp | 2 +- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp | 56 ++++++++++++++++--- src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h | 5 ++ .../tests/test_LCAO_diamondC_2x1x1.cpp | 8 +-- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp index 84b2b3d9a8..bdfee067d1 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp @@ -468,6 +468,7 @@ typename LCAOrbitalBuilderT::BasisSet_t* LCAOrbitalBuilderT::createBasisSe return mBasisSet; } #ifndef QMC_COMPLEX +#ifndef MIXED_PRECISION template<> std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorrection( xmlNodePtr cur, @@ -484,7 +485,6 @@ std::unique_ptr> LCAOrbitalBuilderT::createWithCuspCorre lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); sposet = std::move(lcwc); } -#ifndef MIXED_PRECISION // Create a temporary particle set to use for cusp initialization. // The particle coordinates left at the end are unsuitable for further // computations. The coordinates get set to nuclear positions, which diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp index 99c91ead30..7ee784d3a6 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp @@ -519,6 +519,41 @@ void LCAOrbitalSetT::mw_evaluateVGLImplGEMM(const RefVectorWithLeader +void LCAOrbitalSetT::mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + OffloadMWVArray& vp_phi_v) const +{ + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + //const size_t nw = spo_list.size(); + auto& vp_basis_v_mw = spo_leader.mw_mem_handle_.getResource().vp_basis_v_mw; + //Splatter basis_v + const size_t nVPs = vp_phi_v.size(0); + vp_basis_v_mw.resize(nVPs, BasisSetSize); + + auto basis_list = spo_leader.extractBasisRefList(spo_list); + myBasisSet->mw_evaluateValueVPs(basis_list, vp_list, vp_basis_v_mw); + vp_basis_v_mw.updateFrom(); // TODO: remove this when gemm is implemented + + if (Identity) + { + std::copy_n(vp_basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nVPs, vp_phi_v.data_at(0, 0)); + } + else + { + const size_t requested_orb_size = vp_phi_v.size(1); + assert(requested_orb_size <= this->OrbitalSetSize); + ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize); + BLAS::gemm('T', 'N', + requested_orb_size, // MOs + nVPs, // walkers * Virtual Particles + BasisSetSize, // AOs + 1, C_partial_view.data(), BasisSetSize, vp_basis_v_mw.data(), BasisSetSize, 0, vp_phi_v.data(), + requested_orb_size); + } +} + template void LCAOrbitalSetT::mw_evaluateValue(const RefVectorWithLeader>& spo_list, const RefVectorWithLeader>& P_list, @@ -579,15 +614,20 @@ void LCAOrbitalSetT::mw_evaluateDetRatios(const RefVectorWithLeader& invRow_ptr_list, std::vector>& ratios_list) const { - const size_t nw = spo_list.size(); - for (size_t iw = 0; iw < nw; iw++) - { + assert(this == &spo_list.getLeader()); + auto& spo_leader = spo_list.template getCastedLeader>(); + auto& vp_phi_v = spo_leader.mw_mem_handle_.getResource().vp_phi_v; + + const size_t nVPs = VirtualParticleSetT::countVPs(vp_list); + const size_t requested_orb_size = psi_list[0].get().size(); + vp_phi_v.resize(nVPs, requested_orb_size); + + mw_evaluateValueVPsImplGEMM(spo_list, vp_list, vp_phi_v); + + size_t index = 0; + for (size_t iw = 0; iw < vp_list.size(); iw++) for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) - { - spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]); - ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size()); - } - } + ratios_list[iw][iat] = simd::dot(vp_phi_v.data_at(index++, 0), invRow_ptr_list[iw], requested_orb_size); } template diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h index 24e979595c..a569e57e5e 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h @@ -358,6 +358,11 @@ class LCAOrbitalSetT : public SPOSetT int iat, OffloadMWVArray& phi_v) const; + /// packed walker GEMM implementation with multi virtual particle sets + void mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader>& spo_list, + const RefVectorWithLeader>& vp_list, + OffloadMWVArray& phi_v) const; + /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet RefVectorWithLeader extractBasisRefList(const RefVectorWithLeader>& spo_list) const; diff --git a/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp b/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp index 427a41b88f..f67b775b49 100644 --- a/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp +++ b/src/QMCWaveFunctions/tests/test_LCAO_diamondC_2x1x1.cpp @@ -23,7 +23,8 @@ #include "DistanceTable.h" #include "QMCWaveFunctions/SPOSet.h" #include "QMCWaveFunctions/LCAO/LCAOrbitalSet.h" -#include + +#include #include #include @@ -338,10 +339,9 @@ void test_LCAO_DiamondC_2x1x1_real() ratios_list[iw].resize(nvp_list[iw]); // just need dummy refvec with correct size - SPOSet::ValueVector tmp_psi_list(norb), tmp_psi_list_2(norb); + SPOSet::ValueVector tmp_psi_list(norb); spo->mw_evaluateDetRatios(spo_list, RefVectorWithLeader(VP_, {VP_, VP_2}), - RefVector{tmp_psi_list, tmp_psi_list_2}, invRow_ptr_list, - ratios_list); + RefVector{tmp_psi_list}, invRow_ptr_list, ratios_list); std::vector ratios_ref_0(nvp_); std::vector ratios_ref_1(nvp_2); From f91953494109bc608feefaae2ece09b1c4d737ec Mon Sep 17 00:00:00 2001 From: William F Godoy Date: Tue, 7 Nov 2023 19:44:10 -0500 Subject: [PATCH 3/4] Remove PR branch from CI --- .github/workflows/ci-github-actions.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml index 1665274db4..bdd90a687c 100644 --- a/.github/workflows/ci-github-actions.yaml +++ b/.github/workflows/ci-github-actions.yaml @@ -5,12 +5,10 @@ on: branches: - develop - main - - ref-add-SPOSetT pull_request: branches: - develop - main - - ref-add-SPOSetT jobs: linux: From c3298e8188315af7db1ea98e2ca22741fba53b49 Mon Sep 17 00:00:00 2001 From: William F Godoy Date: Wed, 8 Nov 2023 20:44:12 -0500 Subject: [PATCH 4/4] Fix missing const --- src/Particle/ParticleSetT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp index 0f51ace986..760c8ba005 100644 --- a/src/Particle/ParticleSetT.cpp +++ b/src/Particle/ParticleSetT.cpp @@ -1246,7 +1246,7 @@ void ParticleSetT::applyBC(ParticlePos& pos) } template -void ParticleSetT::applyMinimumImage(ParticlePos& pinout) +void ParticleSetT::applyMinimumImage(ParticlePos& pinout) const { if (getLattice().SuperCellEnum == SUPERCELL_OPEN) return;