1 #ifndef PLANE_FIELD_GATHERER_HPP 
    2 #define PLANE_FIELD_GATHERER_HPP 
   29 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI) 
   38     template<
typename T, 
typename FT>
 
   41         int one_obj_in_dbl = 
sizeof(FT)/8; 
 
   43         View<int*, HostType> recvcounts(
NoInit(
"recvcounts"), 
pol_decomp.mpi.n_intpl_ranks);
 
   45         std::vector<MPI_Request> rrequests(
pol_decomp.mpi.n_intpl_ranks);
 
   48         for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++){
 
   62             int size = root_local_nnode * one_obj_in_dbl;
 
   66             Kokkos::deep_copy(recvcounts, 0);
 
   67             Kokkos::deep_copy(displs, 0);
 
   70             for(
int i=0; i<root_local_nplanes; i++){
 
   71                 int contributor = (i+root_local_first_plane)%
grid.
nplanes;
 
   72                 recvcounts(contributor) = size;
 
   73                 displs(contributor) = i*size;
 
   77             int my_size = recvcounts(
pol_decomp.mpi.my_intpl_rank);
 
   79             MPI_Igatherv(tmp_full.data()+root_local_first_node, my_size, MPI_DOUBLE, destination, recvcounts.data(), displs.data(), MPI_DOUBLE, i_root, 
pol_decomp.mpi.intpl_comm, &(rrequests[i_root]));
 
   82         for(
int i_root=0; i_root<
pol_decomp.mpi.n_intpl_ranks; i_root++) MPI_Wait(&(rrequests[i_root]), MPI_STATUS_IGNORE);
 
   89     void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full, 
const View<FT**, CLayout,HostType>& rho_ff_h){
 
   90         constexpr 
int ZERO_GYRO = 0;
 
   93         if(rho_ff_h.extent(0) != 
grid.
nnode || rho_ff_h.extent(1)<1) 
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
 
   97             tmp_full(inode)=rho_ff_h(inode,ZERO_GYRO);
 
  103     template<
typename FT>
 
  104     void copy_to_tmp_full(View<FT*, CLayout,HostType>& tmp_full, 
const View<FT*, CLayout,HostType>& ff_h){
 
  106         if(ff_h.extent(0) != 
grid.
nnode) 
exit_XGC(
"\nError in gather_phi_ff_on_device: expected host view of size (nnode,nrho)\n");
 
  110             tmp_full(inode)=ff_h(inode);
 
  116     template<
typename GT0, 
typename GT, 
typename GT2>
 
  117     void calculate_phi_ff_on_device(
const Simulation<DeviceType>& sml, 
const Grid<DeviceType>& 
grid, 
GetPotentialGradTemp<DeviceType, DeviceType>& tmp, 
const View<
Field<VarType::Scalar,PhiInterpType::None>*,
CLayout,
HostType>& dpot_h, 
const GT0& pot0_h, GT& phi_ff, GT2& pot_phi_ff, 
bool potential_is_requested=
true, 
bool use_field00=
false, 
bool ignore_poloidal_dpot=
false){
 
  120         int n_initial_ghost_planes = 2;
 
  121         int n_final_ghost_planes = 1;
 
  122         int n_ghost_planes = n_initial_ghost_planes + n_final_ghost_planes;
 
  130         auto* ptr_to_end_of_phi_ff = phi_ff.data()+phi_ff.size();
 
  132         double* dpot_phi_addr = (
double*)(ptr_to_end_of_phi_ff) - size_of_dpot_phi;
 
  133         View<double**,CLayout,DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>> dpot_phi(dpot_phi_addr, 
nplanes+n_ghost_planes, 
nnode);
 
  146         auto destination = dpot_phi_tmp.data() + 
nnode*n_initial_ghost_planes; 
 
  147         MPI_Allgather(pot_tmp.data(), 
nnode, MPI_DOUBLE, destination, 
nnode, MPI_DOUBLE, 
pol_decomp.mpi.intpl_comm);
 
  161             dpot_phi(0,i) = dpot_phi(nplanes_l-2 + n_initial_ghost_planes, i);
 
  162             dpot_phi(1,i) = dpot_phi(nplanes_l-1 + n_initial_ghost_planes, i);
 
  163             dpot_phi(nplanes_l + n_initial_ghost_planes,i) = dpot_phi(0 + n_initial_ghost_planes, i);
 
  172         if(potential_is_requested) pot_phi_ff = GT2(
"gfpack_potview",
nplanes, 
nnode);
 
  175                                             (dpot_phi, ignore_poloidal_dpot);
 
  184         if(potential_is_requested) gpg_field_args.request_potential(pot_phi_ff);
 
  185         gpg_field_args.request_gradient(phi_ff);
 
  196     template<
typename T_h, 
typename FT>
 
  197     void gather_phi_ff_on_device(View<FT**, CLayout,HostType>& tmp, View<FT*, CLayout,HostType>& tmp_full, 
const T_h& rho_ff_h, View<FT**, CLayout,DeviceType>& phi_ff){
 
  198 #if defined(USE_GPU) && !defined(USE_GPU_AWARE_MPI) 
  199         constexpr 
bool gpu_without_gpu_aware_MPI = 
true;
 
  201         constexpr 
bool gpu_without_gpu_aware_MPI = 
false;
 
  208         phi_ff = View<FT**, CLayout,DeviceType>(
"gfpack_view",
nplanes, 
nnode);
 
  211         auto destination = (gpu_without_gpu_aware_MPI ? tmp.data() : phi_ff.data());
 
  218             int one_obj_in_dbl = 
sizeof(FT)/8; 
 
  219             int sz=
nnode * one_obj_in_dbl;
 
  220             MPI_Allgather(tmp_full.data(), sz, MPI_DOUBLE, destination, sz, MPI_DOUBLE, 
pol_decomp.mpi.intpl_comm);
 
  223             if(gpu_without_gpu_aware_MPI)
 
  230         if(gpu_without_gpu_aware_MPI){
 
  231             Kokkos::deep_copy(phi_ff, tmp);
 
  235     template<
typename FT>
 
  238     template<
typename FT>
 
  260     template<
typename T_h, 
typename FT>
 
FieldDecomposition< Device > field_decomp
Definition: domain_decomposition.hpp:88
 
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:91
 
int nplanes
Number of planes.
Definition: grid.hpp:175
 
int nnode
Number of grid nodes.
Definition: grid.hpp:174
 
Definition: plane_field_gatherer.hpp:9
 
void allgather_to_local_ranks(View< FT *, CLayout, HostType > &tmp_full, T *destination)
Definition: plane_field_gatherer.hpp:39
 
int choose_tmp_nphi()
Definition: plane_field_gatherer.hpp:28
 
View< Field< VarType::Vector, PIT_GLOBAL > **, CLayout, HostType > tmp_v
Definition: plane_field_gatherer.hpp:21
 
View< Field< VarType::Scalar, PIT_GLOBAL > *, CLayout, HostType > tmp_s_full
Definition: plane_field_gatherer.hpp:23
 
View< FT *, CLayout, HostType > & which_tmp_full()
 
int tmp_nphi
Definition: plane_field_gatherer.hpp:19
 
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT **, CLayout, HostType > &rho_ff_h)
Definition: plane_field_gatherer.hpp:89
 
const DomainDecomposition< DeviceType > pol_decomp
Definition: plane_field_gatherer.hpp:10
 
int nnode
Definition: plane_field_gatherer.hpp:17
 
const Grid< DeviceType > grid
Definition: plane_field_gatherer.hpp:11
 
View< Field< VarType::Vector, PIT_GLOBAL > *, CLayout, HostType > tmp_v_full
Definition: plane_field_gatherer.hpp:24
 
void calculate_phi_ff_on_device(const Simulation< DeviceType > &sml, const Grid< DeviceType > &grid, GetPotentialGradTemp< DeviceType, DeviceType > &tmp, const View< Field< VarType::Scalar, PhiInterpType::None > *, CLayout, HostType > &dpot_h, const GT0 &pot0_h, GT &phi_ff, GT2 &pot_phi_ff, bool potential_is_requested=true, bool use_field00=false, bool ignore_poloidal_dpot=false)
Definition: plane_field_gatherer.hpp:117
 
PlaneFieldGatherer(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, bool near_field=false)
Definition: plane_field_gatherer.hpp:243
 
void gather_phi_ff_on_device(View< FT **, CLayout, HostType > &tmp, View< FT *, CLayout, HostType > &tmp_full, const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:197
 
bool gather_subset
Definition: plane_field_gatherer.hpp:13
 
View< FT **, CLayout, HostType > & which_tmp()
 
void copy_to_tmp_full(View< FT *, CLayout, HostType > &tmp_full, const View< FT *, CLayout, HostType > &ff_h)
Definition: plane_field_gatherer.hpp:104
 
View< Field< VarType::Scalar, PIT_GLOBAL > **, CLayout, HostType > tmp_s
Definition: plane_field_gatherer.hpp:20
 
int nplanes
Definition: plane_field_gatherer.hpp:16
 
int near_field_pid
Definition: plane_field_gatherer.hpp:15
 
void gather_phi_ff_on_device(const T_h &rho_ff_h, View< FT **, CLayout, DeviceType > &phi_ff)
Definition: plane_field_gatherer.hpp:261
 
bool gather_near_field
Definition: plane_field_gatherer.hpp:14
 
bool grad_psitheta
Definition: sml.hpp:87
 
void get_field_grad(const Grid< DeviceType > &grid, GetPotGradFieldArgs< DeviceIn, DeviceOut, VT, PIT, TT, KT > &args, GetPotentialGradTemp< DeviceType, DeviceOut > &tmp)
Definition: get_potential_grad.cpp:389
 
void exit_XGC(std::string msg)
Definition: globals.hpp:37
 
KOKKOS_INLINE_FUNCTION unsigned positive_modulo(int value, unsigned m)
Definition: globals.hpp:231
 
constexpr PhiInterpType PIT_GLOBAL
Definition: globals.hpp:103
 
@ DriftKin
Definition: globals.hpp:89
 
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
 
View< T *, CLayout, Device > my_mirror_scratch_view(const View< T *, CLayout, Device, Kokkos::MemoryTraits< Kokkos::Unmanaged >> &view, Device nd)
Definition: my_mirror_view.hpp:97
 
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
 
Kokkos::View< T *, Kokkos::LayoutRight, Device > my_subview(const Kokkos::View< T ****, Kokkos::LayoutRight, Device > &view, int i, int j, int k)
Definition: my_subview.hpp:8
 
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
 
Kokkos::Device< HostExSpace, HostMemSpace > HostType
Definition: space_settings.hpp:57
 
Kokkos::LayoutRight CLayout
Definition: space_settings.hpp:68
 
HostType MPIDeviceType
Definition: space_settings.hpp:63
 
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
 
Definition: field.hpp:321
 
Definition: get_potential_grad.hpp:205
 
Definition: get_potential_grad.hpp:44
 
GradientMatrices< DeviceType > grad_matrices
Definition: get_potential_grad.hpp:65
 
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
 
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10