1 #ifndef POLOIDAL_SUM_HPP
2 #define POLOIDAL_SUM_HPP
8 if(pol_decomp.mpi.n_plane_ranks==1)
return;
11 View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((
double*)(f.data()), f.size());
21 MPI_Allreduce(MPI_IN_PLACE, f_mpi.data(), f_mpi.size(), MPI_DOUBLE, MPI_SUM, pol_decomp.mpi.plane_comm);
32 if(pol_decomp.mpi.n_plane_ranks==1)
return;
35 View<int*,CLayout,HostType> cnts(
NoInit(
"cnts"), pol_decomp.mpi.n_plane_ranks);
36 View<int*,CLayout,HostType> displs(
NoInit(
"displs"), pol_decomp.mpi.n_plane_ranks);
39 int nv = full_view.extent(0)*full_view.extent(2);
40 for(
int i = 0; i<cnts.size(); i++){
46 int my_node_send_count = pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank+1) - pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank);
47 int my_send_count = my_node_send_count*nv;
48 int my_node_offset = pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank) - 1;
54 View<double***, CLayout, DeviceType> tmp(
NoInit(
"tmp"), my_node_send_count, full_view.extent(0),full_view.extent(2));
58 Kokkos::parallel_for(
"transpose", Kokkos::RangePolicy<ExSpace>( 0, tmp.extent(0)), KOKKOS_LAMBDA(
const int inode){
59 for(
int imu = 0; imu < tmp.extent(1); imu++){
60 for(
int ivp = 0; ivp < tmp.extent(2); ivp++){
61 tmp(inode, imu, ivp) = full_view(imu, inode+my_node_offset, ivp);
72 View<double*,CLayout, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((
double*)(tmp.data()), tmp.size());
78 View<double***, CLayout, DeviceType> full_tmp(
NoInit(
"full_tmp"), full_view.extent(1), full_view.extent(0), full_view.extent(2));
79 View<double*,CLayout, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D_out((
double*)(full_tmp.data()), full_tmp.size());
81 auto f_mpi_out =
my_mirror_view(f_1D_out, MPIHostType(), f_mpi_d_out);
86 MPI_Allgatherv(f_mpi.data(), my_send_count, MPI_DOUBLE, f_mpi_out.data(), cnts.data(), displs.data(), MPI_DOUBLE, pol_decomp.mpi.plane_comm);
96 Kokkos::parallel_for(
"transpose_back", Kokkos::RangePolicy<ExSpace>( 0, full_tmp.extent(0)), KOKKOS_LAMBDA(
const int inode){
97 for(
int imu = 0; imu < full_tmp.extent(1); imu++){
98 for(
int ivp = 0; ivp < full_tmp.extent(2); ivp++){
99 full_view(imu, inode, ivp) = full_tmp(inode, imu, ivp);
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:57
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
void poloidal_gather(const DomainDecomposition< DeviceType > &pol_decomp, const View< double ***, CLayout, DeviceType > &full_view)
Definition: poloidal_sum.hpp:30
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void poloidal_sum_in_place(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: poloidal_sum.hpp:6
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10
Kokkos::View< int *, Kokkos::LayoutRight, HostType > gvid0_pid_h
Which processors get which vertices (host)
Definition: domain_decomposition.hpp:63