Skip to content

Commit

Permalink
Merge branch 'direct_non_local_energy_calculation' into 'master'
Browse files Browse the repository at this point in the history
Direct non-local energy calculation

See merge request npneq/inq!1146
  • Loading branch information
xavierandrade committed Oct 14, 2024
2 parents 6391ba5 + b238f5b commit 1315504
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 8 deletions.
8 changes: 2 additions & 6 deletions src/hamiltonian/energy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,8 @@ namespace hamiltonian {
eigenvalues_ += occ_sum(el.occupations()[iphi], el.eigenvalues()[iphi]);
}

{
CALI_CXX_MARK_SCOPE("energy::calculate::non_local");
auto nl_me = operations::overlap_diagonal_normalized(ham.non_local(phi), phi);
non_local_ += occ_sum(el.occupations()[iphi], nl_me);
}

non_local_ += ham.non_local_energy(phi, el.occupations()[iphi], /*reduce_states = */ false);

if(ham.exchange().enabled()){
CALI_CXX_MARK_SCOPE("energy::calculate::exchange");
auto exchange_me = operations::overlap_diagonal_normalized(ham.exchange()(phi), phi);
Expand Down
47 changes: 45 additions & 2 deletions src/hamiltonian/ks_hamiltonian.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,30 @@ class ks_hamiltonian {
std::unordered_map<std::string, projector_fourier> projectors_fourier_map_;
std::vector<std::unordered_map<std::string, projector_fourier>::iterator> projectors_fourier_;
states::ks_states states_;


#ifdef ENABLE_CUDA
public:
#endif

template <typename OccType, typename ArrayType>
struct occ_sum_func {
OccType occ;
ArrayType arr;

GPU_FUNCTION double operator()(long ip) const {
return occ[ip]*real(arr[ip]);
}
};

template <typename OccType, typename ArrayType>
static double occ_sum(OccType const & occupations, ArrayType const & array) {
CALI_CXX_MARK_FUNCTION;

assert(occupations.size() == array.size());
auto func = occ_sum_func<decltype(begin(occupations)), decltype(begin(array))>{begin(occupations), begin(array)};
return gpu::run(gpu::reduce(array.size()), func);
}

public:

void update_projectors(const basis::real_space & basis, const atomic_potential & pot, systems::ions const & ions){
Expand Down Expand Up @@ -98,7 +121,7 @@ class ks_hamiltonian {
it->second(phi, vnlphi);
}
}

////////////////////////////////////////////////////////////////////////////////////////////

auto non_local(const states::orbital_set<basis::real_space, complex> & phi) const {
Expand Down Expand Up @@ -129,6 +152,26 @@ class ks_hamiltonian {

////////////////////////////////////////////////////////////////////////////////////////////

template <typename Occupations>
auto non_local_energy(states::orbital_set<basis::real_space, complex> const & phi, Occupations const & occupations, bool const reduce_states = true) const {

CALI_CXX_MARK_FUNCTION;

if(non_local_in_fourier_) {

auto nl_me = operations::overlap_diagonal_normalized(non_local(phi), phi);
auto en = occ_sum(occupations, nl_me);
if(reduce_states and phi.set_comm().size() > 1) phi.set_comm().all_reduce_in_place_n(&en, 1, std::plus<>{});
return en;

} else {
return projectors_all_.energy(phi, phi.kpoint() + uniform_vector_potential_, occupations, reduce_states);
}

}

////////////////////////////////////////////////////////////////////////////////////////////

auto operator()(const states::orbital_set<basis::real_space, complex> & phi) const {

CALI_CXX_MARK_SCOPE("hamiltonian_real");
Expand Down
98 changes: 98 additions & 0 deletions src/hamiltonian/projector_all.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,104 @@ class projector_all {
};

////////////////////////////////////////////////////////////////////////////////////////////

template <typename Projections, typename Coeff, typename Occupations>
struct energy_reduction {
Projections proj;
Coeff coe;
Occupations occ;
long spinor_size;

GPU_FUNCTION auto operator()(long ist, long ilm, long iproj) const {
auto ist_spinor = ist%spinor_size;
auto pp = proj[iproj][ilm][ist];
return real(conj(pp)*pp)*coe[iproj][ilm]*occ[ist_spinor];
}

};

////////////////////////////////////////////////////////////////////////////////////////////

template <typename KpointType, typename Occupations>
double energy(states::orbital_set<basis::real_space, complex> const & phi, KpointType const & kpoint, Occupations const & occupations, bool const reduce_states = true) const {

gpu::array<complex, 3> sphere_phi_all({nprojs_, max_sphere_size_, phi.local_set_size()});
gpu::array<complex, 3> projections_all({nprojs_, max_nlm_, phi.local_set_size()}, 0.0);

{ CALI_CXX_MARK_SCOPE("projector::gather");

gpu::run(phi.local_set_size(), max_sphere_size_, nprojs_,
[sgr = begin(sphere_phi_all), gr = begin(phi.hypercubic()), poi = begin(points_), pos = begin(positions_), kpoint] GPU_LAMBDA (auto ist, auto ipoint, auto iproj){
if(poi[iproj][ipoint][0] >= 0){
auto phase = polar(1.0, dot(kpoint, pos[iproj][ipoint]));
sgr[iproj][ipoint][ist] = phase*gr[poi[iproj][ipoint][0]][poi[iproj][ipoint][1]][poi[iproj][ipoint][2]][ist];
} else {
sgr[iproj][ipoint][ist] = complex(0.0, 0.0);
}
});
}

#ifndef ENABLE_CUDA
for(auto iproj = 0; iproj < nprojs_; iproj++){
CALI_CXX_MARK_SCOPE("projector_gemm_1");

if(locally_empty_[iproj]) continue;

namespace blas = boost::multi::blas;
blas::real_doubled(projections_all[iproj]) = blas::gemm(phi.basis().volume_element(), matrices_[iproj], blas::real_doubled(sphere_phi_all[iproj]));
}
#else
if(max_sphere_size_ > 0) {
CALI_CXX_MARK_SCOPE("projector_gemm_1");

const double zero = 0.0;
const double vol = phi.basis().volume_element();

auto status = cublasDgemmStridedBatched(/*cublasHandle_t handle = */ boost::multi::cuda::cublas::context::get_instance().get(),
/*cublasOperation_t transa = */ CUBLAS_OP_N,
/*cublasOperation_t transb = */ CUBLAS_OP_N,
/*int m = */ 2*phi.local_set_size(),
/*int n = */ max_nlm_,
/*int k = */ max_sphere_size_,
/*const double *alpha = */ &vol,
/*const double *A = */ reinterpret_cast<double const *>(raw_pointer_cast(sphere_phi_all.data_elements())),
/*int lda = */ 2*phi.local_set_size(),
/*long long int strideA = */ 2*max_sphere_size_*phi.local_set_size(),
/*const double *B = */ raw_pointer_cast(matrices_.data_elements()),
/*int ldb = */ max_sphere_size_,
/*long long int strideB =*/ max_nlm_*max_sphere_size_,
/*const double *beta = */ &zero,
/*double *C = */ reinterpret_cast<double *>(raw_pointer_cast(projections_all.data_elements())),
/*int ldc = */ 2*phi.local_set_size(),
/*long long int strideC = */ 2*max_nlm_*phi.local_set_size(),
/*int batchCount = */ nprojs_);
gpu::sync();

assert(status == CUBLAS_STATUS_SUCCESS);

}
#endif

if(phi.basis().comm().size() > 1) {
CALI_CXX_MARK_SCOPE("projector_all::energy::reduce_basis");
phi.basis().comm().all_reduce_in_place_n(raw_pointer_cast(projections_all.data_elements()), projections_all.num_elements(), std::plus<>{});
}

auto en = gpu::run(gpu::reduce(phi.local_set_size()), gpu::reduce(max_nlm_), gpu::reduce(nprojs_),
energy_reduction<decltype(begin(projections_all)), decltype(begin(coeff_)), decltype(begin(occupations))>
{begin(projections_all), begin(coeff_), begin(occupations), phi.local_spinor_set_size()});

if(reduce_states and phi.set_comm().size() > 1) {
CALI_CXX_MARK_SCOPE("projector_all::energy::reduce_states");
phi.set_comm().all_reduce_in_place_n(&en, 1, std::plus<>{});
}

return en;

}

////////////////////////////////////////////////////////////////////////////////////////////


template <typename PhiType, typename GPhiType, typename MetricType, typename OccsType>
void force(PhiType & phi, GPhiType const & gphi, MetricType const & metric,
Expand Down

0 comments on commit 1315504

Please sign in to comment.