XGCa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
toroidal_average.hpp
Go to the documentation of this file.
1 #ifndef TOROIDAL_AVERAGE_HPP
2 #define TOROIDAL_AVERAGE_HPP
3 
4 #include "my_mirror_view.hpp"
6 
7 // Does a copy to the MPI memory space if different from the View's memory space
8 template<class T>
10  // No operation if n_planes is 1
11  if(pol_decomp.mpi.n_intpl_ranks==1) return;
12 
13  // Cast to 1D with an unmanaged view for a generic element-wise operation
14  View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D(f.data(), f.size());
15 
16  using MPIHostType = HostType; // Do this MPI_Allreduce on Host; GPU-aware MPI consistently hangs or aborts here
17 
18  // Copy to host if on GPU and GPU-aware MPI is not available
19  auto f_mpi_d = my_mirror_scratch_view(f_1D,MPIHostType());
20  auto f_mpi = my_mirror_view(f_1D, MPIHostType(), f_mpi_d);
21  mirror_copy(f_mpi, f_1D);
22 
23  /*** Sum all planes ***/
24  MPI_Allreduce(MPI_IN_PLACE, f_mpi.data(), f_mpi.size(), MPI_DOUBLE, MPI_SUM, pol_decomp.mpi.intpl_comm);
25 
26  // Copy back
27  mirror_copy(f_1D, f_mpi);
28 
29  /*** Normalize by n planes ***/
30  // Invert first so that the loop has a multiplication rather than a division
31  double inv_n_planes = 1.0/pol_decomp.mpi.n_intpl_ranks;
32 
33  // Do the normalization
34  Kokkos::parallel_for("toroidal_average", Kokkos::RangePolicy<typename T::execution_space>(0, f.size()), KOKKOS_LAMBDA(const int i){
35  f_1D(i) *= inv_n_planes;
36  });
37  Kokkos::fence();
38 }
39 
40 #endif
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
void toroidal_average_in_place(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: toroidal_average.hpp:9
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:56
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252