1 #ifndef POLOIDAL_SUM_HPP
2 #define POLOIDAL_SUM_HPP
12 if(pol_decomp.mpi.n_plane_ranks==1)
return;
15 View<double*,CLayout, typename T::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((
double*)(f.data()), f.size());
25 MPI_Allreduce(MPI_IN_PLACE, f_mpi.data(), f_mpi.size(), MPI_DOUBLE, MPI_SUM, pol_decomp.mpi.plane_comm);
34 template<
class Device>
37 if(pol_decomp.mpi.n_plane_ranks==1)
return;
42 if(pol_decomp.mpi.my_plane_rank==ROOT_RANK)
mirror_copy(f_mpi, full_view);
47 MPI_Bcast(f_mpi.data(),f_mpi.size(),MPI_DOUBLE,ROOT_RANK,pol_decomp.mpi.plane_comm);
57 template<
class Device>
60 if(pol_decomp.mpi.n_plane_ranks==1)
return;
73 View<double*,CLayout, MPIDeviceType> f_mpi_in(
NoInit(
"f_mpi_in"), mpi_plan.my_count());
74 View<double*,CLayout, MPIDeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_mpi_unm(f_mpi.data() + mpi_plan.my_displ(), mpi_plan.my_count());
75 Kokkos::deep_copy(f_mpi_in, f_mpi_unm);
82 MPI_Gatherv(f_mpi_in.data(), mpi_plan.my_count(), MPI_DOUBLE, f_mpi.data(), mpi_plan.cnts.data(), mpi_plan.displs.data(), MPI_DOUBLE, 0, pol_decomp.mpi.plane_comm);
83 MPI_Bcast(f_mpi.data(),f_mpi.size(),MPI_DOUBLE,0,pol_decomp.mpi.plane_comm);
93 template<
class Device>
96 if(pol_decomp.mpi.n_plane_ranks==1)
return;
99 View<int*,CLayout,HostType> cnts(
NoInit(
"cnts"), pol_decomp.mpi.n_plane_ranks);
100 View<int*,CLayout,HostType> displs(
NoInit(
"displs"), pol_decomp.mpi.n_plane_ranks);
103 int nv = full_view.extent(0)*full_view.extent(2);
104 for(
int i = 0; i<cnts.size(); i++){
110 int my_node_send_count = pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank+1) - pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank);
111 int my_send_count = my_node_send_count*nv;
112 int my_node_offset = pol_decomp.
gvid0_pid_h(pol_decomp.mpi.my_plane_rank) - 1;
118 View<double***, CLayout, Device> tmp(
NoInit(
"tmp"), my_node_send_count, full_view.extent(0),full_view.extent(2));
122 Kokkos::parallel_for(
"transpose", Kokkos::RangePolicy<typename Device::execution_space>( 0, tmp.extent(0)), KOKKOS_LAMBDA(
const int inode){
123 for(
int imu = 0; imu < tmp.extent(1); imu++){
124 for(
int ivp = 0; ivp < tmp.extent(2); ivp++){
125 tmp(inode, imu, ivp) = full_view(imu, inode+my_node_offset, ivp);
136 View<double*,CLayout, Device, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D((
double*)(tmp.data()), tmp.size());
142 View<double***, CLayout, Device> full_tmp(
NoInit(
"full_tmp"), full_view.extent(1), full_view.extent(0), full_view.extent(2));
143 View<double*,CLayout, Device, Kokkos::MemoryTraits<Kokkos::Unmanaged>> f_1D_out((
double*)(full_tmp.data()), full_tmp.size());
150 MPI_Allgatherv(f_mpi.data(), my_send_count, MPI_DOUBLE, f_mpi_out.data(), cnts.data(), displs.data(), MPI_DOUBLE, pol_decomp.mpi.plane_comm);
160 Kokkos::parallel_for(
"transpose_back", Kokkos::RangePolicy<ExSpace>( 0, full_tmp.extent(0)), KOKKOS_LAMBDA(
const int inode){
161 for(
int imu = 0; imu < full_tmp.extent(1); imu++){
162 for(
int ivp = 0; ivp < full_tmp.extent(2); ivp++){
163 full_view(imu, inode, ivp) = full_tmp(inode, imu, ivp);
void poloidal_gather(const DomainDecomposition< DeviceType > &pol_decomp, const View< double *, CLayout, Device > &full_view)
Definition: poloidal_sum.hpp:58
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:57
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
void poloidal_bcast(const DomainDecomposition< DeviceType > &pol_decomp, const View< double *, CLayout, Device > &full_view, int ROOT_RANK)
Definition: poloidal_sum.hpp:35
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void poloidal_sum_in_place(const DomainDecomposition< DeviceType > &pol_decomp, T &f)
Definition: poloidal_sum.hpp:10
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
HostType MPIDeviceType
Definition: space_settings.hpp:63
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
DistributionPlan mpi_distribution_plan(int nv) const
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10
Kokkos::View< int *, Kokkos::LayoutRight, HostType > gvid0_pid_h
Which processors get which vertices (host)
Definition: domain_decomposition.hpp:92