1 #ifndef PLANE_FIELD_GATHERER_HPP
2 #define PLANE_FIELD_GATHERER_HPP
29 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI)
38 template<
typename T,
typename FT>
41 int one_obj_in_dbl =
sizeof(FT)/8;
43 View<int*, HostType> recvcounts(
NoInit(
"recvcounts"),
pol_decomp.mpi.n_intpl_ranks);
45 std::vector<MPI_Request> rrequests(
pol_decomp.mpi.n_intpl_ranks);
48 for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++){
62 int size = root_local_nnode * one_obj_in_dbl;
66 Kokkos::deep_copy(recvcounts, 0);
67 Kokkos::deep_copy(displs, 0);
70 for(
int i=0; i<root_local_nplanes; i++){
71 int contributor = (i+root_local_first_plane)%
grid.
nplanes;
72 recvcounts(contributor) = size;
73 displs(contributor) = i*size;
77 int my_size = recvcounts(
pol_decomp.mpi.my_intpl_rank);
79 MPI_Igatherv(tmp_full.data()+root_local_first_node, my_size, MPI_DOUBLE, destination, recvcounts.data(), displs.data(), MPI_DOUBLE, i_root,
pol_decomp.mpi.intpl_comm, &(rrequests[i_root]));
82 for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++) MPI_Wait(&(rrequests[i_root]), MPI_STATUS_IGNORE);
89 void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full,
const View<FT**, CLayout,HostType>& rho_ff_h){
90 constexpr
int ZERO_GYRO = 0;
93 if(rho_ff_h.extent(0) !=
grid.
nnode || rho_ff_h.extent(1)<1)
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
97 tmp_full(inode)=rho_ff_h(inode,ZERO_GYRO);
103 template<
typename FT>
104 void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full,
const View<FT*, CLayout,HostType>& ff_h){
106 if(ff_h.extent(0) != grid.nnode)
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
109 Kokkos::parallel_for(
"gather_field_info", Kokkos::RangePolicy<HostExSpace>( 0, grid.nnode), [=] (
const int inode){
110 tmp_full(inode)=ff_h(inode);
116 template<
typename GT0,
typename GT,
typename GT2>
117 void calculate_phi_ff_on_device(
const Simulation<DeviceType>& sml,
const Grid<DeviceType>& grid,
const MagneticField<DeviceType>&
magnetic_field,
Smoothing& smoothing,
GetPotentialGradTemp<DeviceType, DeviceType>& tmp,
const View<
Field<VarType::Scalar,PhiInterpType::None>*,
CLayout,
HostType>& dpot_h,
const GT0& pot0_h, GT& phi_ff, GT2& pot_phi_ff,
bool potential_is_requested=
true,
bool use_field00=
false,
bool ignore_poloidal_dpot=
false){
120 int n_initial_ghost_planes = 2;
121 int n_final_ghost_planes = 1;
122 int n_ghost_planes = n_initial_ghost_planes + n_final_ghost_planes;
127 phi_ff = GT(
"gfpack_view",nplanes, nnode);
130 auto* ptr_to_end_of_phi_ff = phi_ff.data()+phi_ff.size();
131 int size_of_dpot_phi = (nplanes+n_ghost_planes)*nnode;
132 double* dpot_phi_addr = (
double*)(ptr_to_end_of_phi_ff) - size_of_dpot_phi;
133 View<double**,CLayout,DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dpot_phi(dpot_phi_addr, nplanes+n_ghost_planes, nnode);
146 auto destination = dpot_phi_tmp.data() + nnode*n_initial_ghost_planes;
147 MPI_Allgather(pot_tmp.data(), nnode, MPI_DOUBLE, destination, nnode, MPI_DOUBLE, pol_decomp.mpi.intpl_comm);
159 int nplanes_l = nplanes;
161 dpot_phi(0,i) = dpot_phi(nplanes_l-2 + n_initial_ghost_planes, i);
162 dpot_phi(1,i) = dpot_phi(nplanes_l-1 + n_initial_ghost_planes, i);
163 dpot_phi(nplanes_l + n_initial_ghost_planes,i) = dpot_phi(0 + n_initial_ghost_planes, i);
172 if(potential_is_requested) pot_phi_ff = GT2(
"gfpack_potview",nplanes, nnode);
175 (dpot_phi, ignore_poloidal_dpot);
184 if(potential_is_requested) gpg_field_args.request_potential(pot_phi_ff);
185 gpg_field_args.request_gradient(phi_ff);
188 get_field_grad(sml, grid, pol_decomp, magnetic_field, smoothing, gpg_field_args, tmp);
196 template<
typename T_h,
typename FT>
197 void gather_phi_ff_on_device(View<FT**, CLayout,HostType>& tmp, View<FT*, CLayout,HostType>& tmp_full,
const T_h& rho_ff_h, View<FT**, CLayout,DeviceType>& phi_ff){
198 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI)
199 constexpr
bool gpu_without_gpu_aware_MPI =
true;
201 constexpr
bool gpu_without_gpu_aware_MPI =
false;
205 copy_to_tmp_full(tmp_full, rho_ff_h);
208 phi_ff = View<FT**, CLayout,DeviceType>(
"gfpack_view",nplanes, nnode);
211 auto destination = (gpu_without_gpu_aware_MPI ? tmp.data() : phi_ff.data());
215 allgather_to_local_ranks(tmp_full, destination);
218 int one_obj_in_dbl =
sizeof(FT)/8;
219 int sz=nnode * one_obj_in_dbl;
220 MPI_Allgather(tmp_full.data(), sz, MPI_DOUBLE, destination, sz, MPI_DOUBLE, pol_decomp.mpi.intpl_comm);
223 if(gpu_without_gpu_aware_MPI)
224 for(
int i=0; i<grid.nplanes; i++) Kokkos::deep_copy(
my_subview(tmp, i), tmp_full);
226 for(
int i=0; i<grid.nplanes; i++) Kokkos::deep_copy(
my_subview(phi_ff, i), tmp_full);
230 if(gpu_without_gpu_aware_MPI){
231 Kokkos::deep_copy(phi_ff, tmp);
235 template<
typename FT>
236 inline View<FT**, CLayout,HostType>& which_tmp();
238 template<
typename FT>
239 inline View<FT*, CLayout,HostType>& which_tmp_full();
244 : pol_decomp(pol_decomp),
246 gather_subset(pol_decomp.decompose_fields),
247 gather_near_field(near_field),
248 near_field_pid((gather_subset && near_field) ? pol_decomp.field_decomp.find_domain_owner(pol_decomp.plane_index, grid.nplanes, pol_decomp.node_offset, grid.nnode) : 0),
250 nplanes( gather_subset ? (near_field ? pol_decomp.field_decomp.all_n_planes (near_field_pid, grid.nplanes) : pol_decomp.field_decomp.n_planes ) : grid.nplanes),
251 nnode( gather_subset ? (near_field ? pol_decomp.field_decomp.all_n_nodes (near_field_pid) : pol_decomp.field_decomp.n_nodes ) : grid.nnode),
252 tmp_nphi(choose_tmp_nphi()),
253 tmp_s(
"tmp_s", tmp_nphi, nnode),
254 tmp_v(
"tmp_v", tmp_nphi, nnode),
255 tmp_s_full(
"tmp_s_full", grid.nnode),
256 tmp_v_full(
"tmp_v_full", grid.nnode)
260 template<
typename T_h,
typename FT>
262 gather_phi_ff_on_device(which_tmp<FT>(), which_tmp_full<FT>(), rho_ff_h, phi_ff);
bool gather_subset
Definition: plane_field_gatherer.hpp:13
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
View< Field< VarType::Scalar, PIT_GLOBAL > *, CLayout, HostType > tmp_s_full
Definition: plane_field_gatherer.hpp:23
View< int *, CLayout, HostType > all_last_node
Last node of each rank.
Definition: field_decomposition.hpp:42
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
Definition: field.hpp:301
int nnode
Definition: plane_field_gatherer.hpp:17
void gather_phi_ff_on_device(const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:261
View< FT **, CLayout, HostType > & which_tmp()
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:56
Definition: globals.hpp:89
Definition: plane_field_gatherer.hpp:9
KOKKOS_INLINE_FUNCTION int find_domain_owner(int global_plane_index, int nplanes_total, int global_node_index, int nnodes_total) const
Definition: field_decomposition.hpp:136
Definition: magnetic_field.hpp:12
Definition: get_potential_grad.hpp:285
View< int *, CLayout, HostType > map_from_global_intpl
Rank in this communicator for each rank global intpl.
Definition: field_decomposition.hpp:40
void get_field_grad(const Simulation< DeviceType > &sml, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, const MagneticField< DeviceType > &magnetic_field, Smoothing &smoothing, GetPotGradFieldArgs< DeviceIn, DeviceOut, VT, PIT, TT, KT > &args, GetPotentialGradTemp< DeviceType, DeviceOut > &tmp)
Definition: get_potential_grad.cpp:395
View< Field< VarType::Vector, PIT_GLOBAL > *, CLayout, HostType > tmp_v_full
Definition: plane_field_gatherer.hpp:24
int nplanes
Number of planes.
Definition: grid.hpp:160
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:59
Kokkos::LayoutRight CLayout
Definition: space_settings.hpp:67
int choose_tmp_nphi()
Definition: plane_field_gatherer.hpp:28
GradientMatrices< DeviceType > grad_matrices
Definition: get_potential_grad.hpp:74
View< FT *, CLayout, HostType > & which_tmp_full()
KOKKOS_INLINE_FUNCTION unsigned positive_modulo(int value, unsigned m)
Definition: globals.hpp:211
bool grad_psitheta
Definition: sml.hpp:100
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT *, CLayout, HostType > &ff_h)
Definition: plane_field_gatherer.hpp:104
View< Field< VarType::Scalar, PIT_GLOBAL > **, CLayout, HostType > tmp_s
Definition: plane_field_gatherer.hpp:20
constexpr PhiInterpType PIT_GLOBAL
Definition: globals.hpp:105
const Grid< DeviceType > grid
Definition: plane_field_gatherer.hpp:11
const DomainDecomposition< DeviceType > pol_decomp
Definition: plane_field_gatherer.hpp:10
Definition: smoothing.hpp:8
void gather_phi_ff_on_device(View< FT **, CLayout, HostType > &tmp, View< FT *, CLayout, HostType > &tmp_full, const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:197
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
PlaneFieldGatherer(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, bool near_field=false)
Definition: plane_field_gatherer.hpp:243
bool gather_near_field
Definition: plane_field_gatherer.hpp:14
View< int *, CLayout, HostType > all_first_plane
First plane of each rank.
Definition: field_decomposition.hpp:43
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void allgather_to_local_ranks(View< FT *, CLayout, HostType > &tmp_full, T *destination)
Definition: plane_field_gatherer.hpp:39
void exit_XGC(std::string msg)
Definition: globals.hpp:37
int tmp_nphi
Definition: plane_field_gatherer.hpp:19
Kokkos::View< T *, Kokkos::LayoutRight, Device > my_subview(const Kokkos::View< T ****, Kokkos::LayoutRight, Device > &view, int i, int j, int k)
Definition: my_subview.hpp:8
View< int *, CLayout, HostType > all_last_plane
Last plane of each rank.
Definition: field_decomposition.hpp:44
int nplanes
Definition: plane_field_gatherer.hpp:16
Definition: magnetic_field.F90:1
int near_field_pid
Definition: plane_field_gatherer.hpp:15
View< Field< VarType::Vector, PIT_GLOBAL > **, CLayout, HostType > tmp_v
Definition: plane_field_gatherer.hpp:21
FieldDecomposition< Device > field_decomp
Definition: domain_decomposition.hpp:56
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
HostType MPIDeviceType
Definition: space_settings.hpp:62
void calculate_gradient(const Grid< DeviceType > &grid, const GradientMatrices< DeviceType > &grad_matrices)
Definition: get_potential_grad.hpp:153
int nnode
Number of grid nodes.
Definition: grid.hpp:159
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT **, CLayout, HostType > &rho_ff_h)
Definition: plane_field_gatherer.hpp:89
View< int *, CLayout, HostType > all_first_node
First node of each rank.
Definition: field_decomposition.hpp:41
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:68
void calculate_phi_ff_on_device(const Simulation< DeviceType > &sml, const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, Smoothing &smoothing, GetPotentialGradTemp< DeviceType, DeviceType > &tmp, const View< Field< VarType::Scalar, PhiInterpType::None > *, CLayout, HostType > &dpot_h, const GT0 &pot0_h, GT &phi_ff, GT2 &pot_phi_ff, bool potential_is_requested=true, bool use_field00=false, bool ignore_poloidal_dpot=false)
Definition: plane_field_gatherer.hpp:117
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10
Definition: get_potential_grad.hpp:53