1 #ifndef PLANE_FIELD_GATHERER_HPP
2 #define PLANE_FIELD_GATHERER_HPP
29 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI)
38 template<
typename T,
typename FT>
41 int one_obj_in_dbl =
sizeof(FT)/8;
43 View<int*, HostType> recvcounts(
NoInit(
"recvcounts"),
pol_decomp.mpi.n_intpl_ranks);
45 std::vector<MPI_Request> rrequests(
pol_decomp.mpi.n_intpl_ranks);
48 for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++){
62 int size = root_local_nnode * one_obj_in_dbl;
66 Kokkos::deep_copy(recvcounts, 0);
67 Kokkos::deep_copy(displs, 0);
70 for(
int i=0; i<root_local_nplanes; i++){
71 int contributor = (i+root_local_first_plane)%
grid.
nplanes;
72 recvcounts(contributor) = size;
73 displs(contributor) = i*size;
77 int my_size = recvcounts(
pol_decomp.mpi.my_intpl_rank);
79 MPI_Igatherv(tmp_full.data()+root_local_first_node, my_size, MPI_DOUBLE, destination, recvcounts.data(), displs.data(), MPI_DOUBLE, i_root,
pol_decomp.mpi.intpl_comm, &(rrequests[i_root]));
82 for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++) MPI_Wait(&(rrequests[i_root]), MPI_STATUS_IGNORE);
89 void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full,
const View<FT**, CLayout,HostType>& rho_ff_h){
90 constexpr
int ZERO_GYRO = 0;
93 if(rho_ff_h.extent(0) !=
grid.
nnode || rho_ff_h.extent(1)<1)
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
97 tmp_full(inode)=rho_ff_h(inode,ZERO_GYRO);
103 template<
typename FT>
104 void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full,
const View<FT*, CLayout,HostType>& ff_h){
106 if(ff_h.extent(0) !=
grid.
nnode)
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
110 tmp_full(inode)=ff_h(inode);
116 template<
typename GT0,
typename GT,
typename GT2>
117 void calculate_phi_ff_on_device(
const Simulation<DeviceType>& sml,
const Grid<DeviceType>&
grid,
GetPotentialGradTemp<DeviceType, DeviceType>& tmp,
const View<
Field<VarType::Scalar,PhiInterpType::None>*,
CLayout,
HostType>& dpot_h,
const GT0& pot0_h, GT& phi_ff, GT2& pot_phi_ff,
bool potential_is_requested=
true,
bool use_field00=
false,
bool ignore_poloidal_dpot=
false){
120 int n_initial_ghost_planes = 2;
121 int n_final_ghost_planes = 1;
122 int n_ghost_planes = n_initial_ghost_planes + n_final_ghost_planes;
130 auto* ptr_to_end_of_phi_ff = phi_ff.data()+phi_ff.size();
132 double* dpot_phi_addr = (
double*)(ptr_to_end_of_phi_ff) - size_of_dpot_phi;
133 View<double**,CLayout,DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dpot_phi(dpot_phi_addr,
nplanes+n_ghost_planes,
nnode);
146 auto destination = dpot_phi_tmp.data() +
nnode*n_initial_ghost_planes;
147 MPI_Allgather(pot_tmp.data(),
nnode, MPI_DOUBLE, destination,
nnode, MPI_DOUBLE,
pol_decomp.mpi.intpl_comm);
161 dpot_phi(0,i) = dpot_phi(nplanes_l-2 + n_initial_ghost_planes, i);
162 dpot_phi(1,i) = dpot_phi(nplanes_l-1 + n_initial_ghost_planes, i);
163 dpot_phi(nplanes_l + n_initial_ghost_planes,i) = dpot_phi(0 + n_initial_ghost_planes, i);
172 if(potential_is_requested) pot_phi_ff = GT2(
"gfpack_potview",
nplanes,
nnode);
175 (dpot_phi, ignore_poloidal_dpot);
184 if(potential_is_requested) gpg_field_args.request_potential(pot_phi_ff);
185 gpg_field_args.request_gradient(phi_ff);
196 template<
typename T_h,
typename FT>
197 void gather_phi_ff_on_device(View<FT**, CLayout,HostType>& tmp, View<FT*, CLayout,HostType>& tmp_full,
const T_h& rho_ff_h, View<FT**, CLayout,DeviceType>& phi_ff){
198 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI)
199 constexpr
bool gpu_without_gpu_aware_MPI =
true;
201 constexpr
bool gpu_without_gpu_aware_MPI =
false;
208 phi_ff = View<FT**, CLayout,DeviceType>(
"gfpack_view",
nplanes,
nnode);
211 auto destination = (gpu_without_gpu_aware_MPI ? tmp.data() : phi_ff.data());
218 int one_obj_in_dbl =
sizeof(FT)/8;
219 int sz=
nnode * one_obj_in_dbl;
220 MPI_Allgather(tmp_full.data(), sz, MPI_DOUBLE, destination, sz, MPI_DOUBLE,
pol_decomp.mpi.intpl_comm);
223 if(gpu_without_gpu_aware_MPI)
230 if(gpu_without_gpu_aware_MPI){
231 Kokkos::deep_copy(phi_ff, tmp);
235 template<
typename FT>
238 template<
typename FT>
260 template<
typename T_h,
typename FT>
FieldDecomposition< Device > field_decomp
Definition: domain_decomposition.hpp:88
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:91
int nplanes
Number of planes.
Definition: grid.hpp:175
int nnode
Number of grid nodes.
Definition: grid.hpp:174
Definition: plane_field_gatherer.hpp:9
void allgather_to_local_ranks(View< FT *, CLayout, HostType > &tmp_full, T *destination)
Definition: plane_field_gatherer.hpp:39
int choose_tmp_nphi()
Definition: plane_field_gatherer.hpp:28
View< Field< VarType::Vector, PIT_GLOBAL > **, CLayout, HostType > tmp_v
Definition: plane_field_gatherer.hpp:21
View< Field< VarType::Scalar, PIT_GLOBAL > *, CLayout, HostType > tmp_s_full
Definition: plane_field_gatherer.hpp:23
View< FT *, CLayout, HostType > & which_tmp_full()
int tmp_nphi
Definition: plane_field_gatherer.hpp:19
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT **, CLayout, HostType > &rho_ff_h)
Definition: plane_field_gatherer.hpp:89
const DomainDecomposition< DeviceType > pol_decomp
Definition: plane_field_gatherer.hpp:10
int nnode
Definition: plane_field_gatherer.hpp:17
const Grid< DeviceType > grid
Definition: plane_field_gatherer.hpp:11
View< Field< VarType::Vector, PIT_GLOBAL > *, CLayout, HostType > tmp_v_full
Definition: plane_field_gatherer.hpp:24
void calculate_phi_ff_on_device(const Simulation< DeviceType > &sml, const Grid< DeviceType > &grid, GetPotentialGradTemp< DeviceType, DeviceType > &tmp, const View< Field< VarType::Scalar, PhiInterpType::None > *, CLayout, HostType > &dpot_h, const GT0 &pot0_h, GT &phi_ff, GT2 &pot_phi_ff, bool potential_is_requested=true, bool use_field00=false, bool ignore_poloidal_dpot=false)
Definition: plane_field_gatherer.hpp:117
PlaneFieldGatherer(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, bool near_field=false)
Definition: plane_field_gatherer.hpp:243
void gather_phi_ff_on_device(View< FT **, CLayout, HostType > &tmp, View< FT *, CLayout, HostType > &tmp_full, const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:197
bool gather_subset
Definition: plane_field_gatherer.hpp:13
View< FT **, CLayout, HostType > & which_tmp()
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT *, CLayout, HostType > &ff_h)
Definition: plane_field_gatherer.hpp:104
View< Field< VarType::Scalar, PIT_GLOBAL > **, CLayout, HostType > tmp_s
Definition: plane_field_gatherer.hpp:20
int nplanes
Definition: plane_field_gatherer.hpp:16
int near_field_pid
Definition: plane_field_gatherer.hpp:15
void gather_phi_ff_on_device(const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:261
bool gather_near_field
Definition: plane_field_gatherer.hpp:14
bool grad_psitheta
Definition: sml.hpp:89
void get_field_grad(const Grid< DeviceType > &grid, GetPotGradFieldArgs< DeviceIn, DeviceOut, VT, PIT, TT, KT > &args, GetPotentialGradTemp< DeviceType, DeviceOut > &tmp)
Definition: get_potential_grad.cpp:389
void exit_XGC(std::string msg)
Definition: globals.hpp:37
KOKKOS_INLINE_FUNCTION unsigned positive_modulo(int value, unsigned m)
Definition: globals.hpp:231
constexpr PhiInterpType PIT_GLOBAL
Definition: globals.hpp:103
@ DriftKin
Definition: globals.hpp:89
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
Kokkos::View< T *, Kokkos::LayoutRight, Device > my_subview(const Kokkos::View< T ****, Kokkos::LayoutRight, Device > &view, int i, int j, int k)
Definition: my_subview.hpp:8
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:57
Kokkos::LayoutRight CLayout
Definition: space_settings.hpp:68
HostType MPIDeviceType
Definition: space_settings.hpp:63
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
Definition: field.hpp:321
Definition: get_potential_grad.hpp:205
Definition: get_potential_grad.hpp:44
GradientMatrices< DeviceType > grad_matrices
Definition: get_potential_grad.hpp:65
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10