XGC1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
toroidal_average.hpp
Go to the documentation of this file.
1 #ifndef TOROIDAL_AVERAGE_HPP
2 #define TOROIDAL_AVERAGE_HPP
3 
4 #include "my_mirror_view.hpp"
6 
7 // Does a copy to the MPI memory space if different from the View's memory space
8 template<class T>
10 #ifdef USE_MPI
11  // No operation if n_planes is 1
12  if(pol_decomp.mpi.n_intpl_ranks==1) return;
13 
14  // Cast to 1D with an unmanaged view for a generic element-wise operation
15  View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((double*)(f.data()), f.size());
16 
17  using MPIHostType = HostType; // Do this MPI_Allreduce on Host; GPU-aware MPI consistently hangs or aborts here
18 
19  // Copy to host if on GPU and GPU-aware MPI is not available
20  auto f_mpi_d = my_mirror_scratch_view(f_1D,MPIHostType());
21  auto f_mpi = my_mirror_view(f_1D, MPIHostType(), f_mpi_d);
22  mirror_copy(f_mpi, f_1D);
23 
24  /*** Sum all planes ***/
25  MPI_Allreduce(MPI_IN_PLACE, f_mpi.data(), f_mpi.size(), MPI_DOUBLE, MPI_SUM, pol_decomp.mpi.intpl_comm);
26 
27  // Copy back
28  mirror_copy(f_1D, f_mpi);
29 #endif
30 }
31 
32 // Does a copy to the MPI memory space if different from the View's memory space
33 template<class T>
35 #ifdef USE_MPI
36  // No operation if n_planes is 1
37  if(pol_decomp.mpi.n_intpl_ranks==1) return;
38 
39  // Cast to 1D with an unmanaged view for a generic element-wise operation
40  View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((double*)(f.data()), f.size());
41 
42  using MPIHostType = HostType; // Do this MPI_Allreduce on Host; GPU-aware MPI consistently hangs or aborts here
43 
44  // Copy to host if on GPU and GPU-aware MPI is not available
45  auto f_mpi_d = my_mirror_scratch_view(f_1D,MPIHostType());
46  auto f_mpi = my_mirror_view(f_1D, MPIHostType(), f_mpi_d);
47  mirror_copy(f_mpi, f_1D);
48 
49  /*** Sum all planes ***/
50  MPI_Allreduce(MPI_IN_PLACE, f_mpi.data(), f_mpi.size(), MPI_DOUBLE, MPI_SUM, pol_decomp.mpi.intpl_comm);
51 
52  // Copy back
53  mirror_copy(f_1D, f_mpi);
54 
55  /*** Normalize by n planes ***/
56  // Invert first so that the loop has a multiplication rather than a division
57  double inv_n_planes = 1.0/pol_decomp.mpi.n_intpl_ranks;
58 
59  // Do the normalization
60  Kokkos::parallel_for("toroidal_average", Kokkos::RangePolicy<typename T::execution_space>(0, f.size()), KOKKOS_LAMBDA(const int i){
61  f_1D(i) *= inv_n_planes;
62  });
63  Kokkos::fence();
64 #endif
65 }
66 
67 template<class T>
68 View<double*,CLayout, typename T::device_type> split_toroidal_average(const DomainDecomposition<DeviceType>& pol_decomp, T& f){
69  // Cast to 1D with an unmanaged view for a generic element-wise operation
70  View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((double*)(f.data()), f.size());
71 
72  // Initialize temporary memory to hold the toroidal average
73  View<double*,CLayout, typename T::device_type> toroidal_avg(NoInit("toroidal_avg"), f_1D.layout());
74  Kokkos::deep_copy(toroidal_avg, f_1D);
75 
76  // Calculate the average
77  toroidal_average_in_place(pol_decomp, toroidal_avg);
78 
79  // Remove the average from f
80  Kokkos::parallel_for("toroidal_average", Kokkos::RangePolicy<typename T::execution_space>(0, f.size()), KOKKOS_LAMBDA(const int i){
81  f_1D(i) -= toroidal_avg(i);
82  });
83  Kokkos::fence();
84 
85  return toroidal_avg;
86 }
87 
88 template<class T>
90  auto toroidal_avg = split_toroidal_average(pol_decomp, f);
91 }
92 
93 #endif
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
void toroidal_average_in_place(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: toroidal_average.hpp:34
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:57
subroutine remove_toroidal_average(n, x)
Removes the toroidal average of a quantity of type real(8) that is global on a poloidal plane but loc...
Definition: tiny_functions.F90:72
void toroidal_sum_in_place(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: toroidal_average.hpp:9
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
View< double *, CLayout, typename T::device_type > split_toroidal_average(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: toroidal_average.hpp:68
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69