1 #ifndef TRANSFER_VERTEX_DATA_HPP
2 #define TRANSFER_VERTEX_DATA_HPP
6 inline int pair(
int np,
int p,
int k){
8 return (out<np ? out : -1);
24 return array.extent(1);
50 template<
class V,
class... TRest>
57 KOKKOS_INLINE_FUNCTION
double&
vertex_access(
const T& array,
int i,
int ip);
61 KOKKOS_INLINE_FUNCTION
double&
vertex_access(
const View<double*,CLayout,HostType>& array,
int i,
int ip){
67 KOKKOS_INLINE_FUNCTION
double&
vertex_access(
const View<double**,CLayout,HostType>& array,
int i,
int ip){
82 template<
typename T,
typename... TRest>
91 array = View<double*,CLayout,HostType>(
NoInit(array.label()), nnodes);
97 array = View<double**,CLayout,HostType>(
NoInit(array.label()), nnodes, array.extent(1));
114 inline void load_arrays(
int n,
int new_offset,
int old_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const V& array) {
116 Kokkos::parallel_for(
"pol_decomp_redist_load", Kokkos::RangePolicy<typename V::execution_space>(0, n), KOKKOS_LAMBDA(
const int i){
117 int inew = i + new_offset;
118 int iold = i + old_offset;
119 for(
int ip = 0; ip<n_vals; ip++){
124 arr_offset += n_vals;
128 template<
class V,
class... TRest>
129 inline void load_arrays(
int n,
int new_offset,
int old_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const V& first,
const TRest&... args) {
130 load_arrays(n, new_offset, old_offset, buffer, arr_offset, first);
131 load_arrays(n, new_offset, old_offset, buffer, arr_offset, args...);
136 inline void load_arrays(
int n,
int new_offset,
int old_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const std::vector<View<double*,CLayout,HostType>>& array) {
138 for(
int ip=0; ip<n_vals; ip++){
139 auto view = array[ip];
140 Kokkos::parallel_for(
"pol_decomp_redist_load", Kokkos::RangePolicy<HostExSpace>(0, n), KOKKOS_LAMBDA(
const int i){
141 int inew = i + new_offset;
142 int iold = i + old_offset;
143 buffer(inew,ip+arr_offset) = view(iold);
147 arr_offset += n_vals;
151 template<
class... Vs>
152 inline void load_buffer(
int n,
int new_offset,
int old_offset,
const View<double**,CLayout,HostType>& buffer,
const Vs&... arrays){
156 load_arrays(n, new_offset, old_offset, buffer, arr_offset, arrays...);
165 inline void unload_arrays(
int n,
int vert_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const V& array) {
167 Kokkos::parallel_for(
"pol_decomp_redist_unload", Kokkos::RangePolicy<typename V::execution_space>(0, n), KOKKOS_LAMBDA(
const int i){
168 for(
int ip = 0; ip<n_vals; ip++){
169 vertex_access(array, i+vert_offset, ip) = buffer(i,ip+arr_offset);
173 arr_offset += n_vals;
177 template<
class V,
class... TRest>
178 inline void unload_arrays(
int n,
int vert_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const V& first,
const TRest&... args) {
185 inline void unload_arrays(
int n,
int vert_offset,
const View<double**,CLayout,HostType>& buffer,
int& arr_offset,
const std::vector<View<double*,CLayout,HostType>>& array) {
187 for(
int ip=0; ip<n_vals; ip++){
188 auto view = array[ip];
189 Kokkos::parallel_for(
"pol_decomp_redist_unload", Kokkos::RangePolicy<HostExSpace>(0, n), KOKKOS_LAMBDA(
const int i){
190 view(i+vert_offset) = buffer(i,ip+arr_offset);
194 arr_offset += n_vals;
201 for(
int i=0;i<plan.
cnts.size();i++){
206 constexpr
int MAXPREPOSTS = 20;
207 return (n_recv <= MAXPREPOSTS);
211 int max_buf_size = 1;
212 for(
int i=0;i<plan.
cnts.size();i++){
214 max_buf_size = std::max(max_buf_size, plan.
cnts(i));
224 template<
class Device>
226 View<double**,CLayout,Device>
view;
237 :
view(
NoInit(
"tmp_redistribute"), nnode_in, n_dbl_per_vertex),
249 template<
class... Vs>
250 void unload(
int vertex_offset,
const Vs&... arrays)
const{
251 if(
nnode()<=0)
return;
258 return view.extent(0);
264 MPI_Wait(&(
rrequests[i]), MPI_STATUS_IGNORE);
272 MPI_Wait(&(
srequests[i]), MPI_STATUS_IGNORE);
281 template<
class... Vs>
293 GPTLstop(
"TRANSFER_DATA_BUFFER_SETUP");
295 constexpr
int MSG_TAG = 5;
296 constexpr
int SIGNAL_TAG = 10;
299 GPTLstart(
"TRANSFER_DATA_PREPOST_SETUP");
301 GPTLstop(
"TRANSFER_DATA_PREPOST_SETUP");
303 GPTLstart(
"TRANSFER_DATA_PREPOST_IRECV");
304 int p=1;
while ( p < n_ranks ) p*=2;
305 if(prepost_receive_requests){
306 for(
int i=1;i<p;i++){
307 int pid =
pair(n_ranks,i,my_rank);
309 if (recv_plan.
cnts(pid) > 0){
310 double* recv_addr = &vertex_buffer.
view(recv_plan.
displs(pid),0);
311 MPI_Irecv(recv_addr, recv_plan.
cnts(pid)*n_dbl_per_vertex, MPI_DOUBLE, pid, MSG_TAG, comm, &(vertex_buffer.
rrequests[pid]));
315 MPI_Send(&rsignal, 1, MPI_INTEGER, pid, SIGNAL_TAG, comm);
321 GPTLstop(
"TRANSFER_DATA_PREPOST_IRECV");
323 GPTLstart(
"TRANSFER_DATA_LOAD_STAY_BUFFER");
326 GPTLstop(
"TRANSFER_DATA_LOAD_STAY_BUFFER");
330 GPTLstart(
"TRANSFER_DATA_ALLOC_SENDARR_SH");
331 View<double**,CLayout,HostType> sendarr_shared;
334 sendarr_shared = View<double**,CLayout,HostType>(
NoInit(
"sendarr_shared"), n_sendarr, n_dbl_per_vertex);
336 GPTLstop(
"TRANSFER_DATA_ALLOC_SENDARR_SH");
338 for(
int i=1;i<p;i++){
339 int pid =
pair(n_ranks,i,my_rank);
341 if(!prepost_receive_requests){
343 if(recv_plan.
cnts(pid) > 0){
345 double* recv_addr = &vertex_buffer.
view(recv_plan.
displs(pid),0);
346 MPI_Irecv(recv_addr, recv_plan.
cnts(pid)*n_dbl_per_vertex, MPI_DOUBLE, pid, MSG_TAG, comm, &(vertex_buffer.
rrequests[pid]));
350 MPI_Send(&rsignal, 1, MPI_INTEGER, pid, SIGNAL_TAG, comm);
356 if (send_plan.
cnts(pid) > 0){
357 View<double**,CLayout,HostType> sendarr;
359 GPTLstart(
"TRANSFER_DATA_ALLOC_SENDARR");
361 vertex_buffer.
send_buffers[pid] = View<double**,CLayout,HostType>(
NoInit(
"sendarr"), send_plan.
cnts(pid), n_dbl_per_vertex);
363 GPTLstop(
"TRANSFER_DATA_ALLOC_SENDARR");
367 sendarr = Kokkos::subview(sendarr_shared, Kokkos::make_pair(0,send_plan.
cnts(pid)), Kokkos::ALL());
368 GPTLstop(
"TRANSFER_DATA_SENDARR_SUB");
371 GPTLstart(
"TRANSFER_DATA_LOAD_SEND_BUFFER");
374 GPTLstop(
"TRANSFER_DATA_LOAD_SEND_BUFFER");
379 MPI_Recv(&ssignal, 1, MPI_INTEGER, pid, SIGNAL_TAG, comm, MPI_STATUS_IGNORE);
383 MPI_Isend(sendarr.data(), sendarr.size(), MPI_DOUBLE, pid, MSG_TAG, comm, &(vertex_buffer.
srequests[pid]));
386 MPI_Rsend(sendarr.data(), sendarr.size(), MPI_DOUBLE, pid, MSG_TAG, comm);
393 if (recv_plan.
cnts(pid) > 0){
395 MPI_Wait(&(vertex_buffer.
rrequests[pid]), MPI_STATUS_IGNORE);
401 return vertex_buffer;
KOKKOS_INLINE_FUNCTION int n_species() const
Definition: vgrid_distribution.hpp:187
void resize_n_vertices(int new_n_nodes)
Definition: vgrid_distribution.hpp:241
KOKKOS_INLINE_FUNCTION double & pull_node_index(int inode, int ip) const
Definition: vgrid_distribution.hpp:232
KOKKOS_INLINE_FUNCTION int n_vr() const
Definition: vgrid_distribution.hpp:191
KOKKOS_INLINE_FUNCTION int n_vz() const
Definition: vgrid_distribution.hpp:199
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
Definition: domain_decomposition.hpp:15
View< int *, CLayout, HostType > displs
Definition: domain_decomposition.hpp:17
View< int *, CLayout, HostType > cnts
Definition: domain_decomposition.hpp:16
int my_rank
Definition: domain_decomposition.hpp:18
Definition: my_mpi.hpp:19
int n_plane_ranks
Definition: my_mpi.hpp:40
int my_plane_rank
Definition: my_mpi.hpp:39
MPI_Comm plane_comm
Definition: my_mpi.hpp:38
Definition: transfer_vertex_data.hpp:225
VertexBuffer()
Definition: transfer_vertex_data.hpp:234
void unload(int vertex_offset, const Vs &... arrays) const
Definition: transfer_vertex_data.hpp:250
std::vector< View< double **, CLayout, Device > > send_buffers
Definition: transfer_vertex_data.hpp:230
VertexBuffer(int nnode_in, int n_dbl_per_vertex, int n_ranks)
Definition: transfer_vertex_data.hpp:236
std::vector< MPI_Request > srequests
Definition: transfer_vertex_data.hpp:231
void await_sends()
Definition: transfer_vertex_data.hpp:269
View< bool *, CLayout, HostType > sreq_assigned
Definition: transfer_vertex_data.hpp:232
View< bool *, CLayout, HostType > rreq_assigned
Definition: transfer_vertex_data.hpp:228
View< double **, CLayout, Device > view
Definition: transfer_vertex_data.hpp:226
int nnode() const
Definition: transfer_vertex_data.hpp:257
std::vector< MPI_Request > rrequests
Definition: transfer_vertex_data.hpp:227
void await_recvs()
Definition: transfer_vertex_data.hpp:261
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10
void load_buffer(int n, int new_offset, int old_offset, const View< double **, CLayout, HostType > &buffer, const Vs &... arrays)
Definition: transfer_vertex_data.hpp:152
int total_n_doubles_per_vertex(const V &first)
Definition: transfer_vertex_data.hpp:43
void load_arrays(int n, int new_offset, int old_offset, const View< double **, CLayout, HostType > &buffer, int &arr_offset, const V &array)
Definition: transfer_vertex_data.hpp:114
int get_sum_counts(const DistributionPlan &plan)
Definition: transfer_vertex_data.hpp:220
bool do_prepost_receive_requests(const DistributionPlan &plan)
Definition: transfer_vertex_data.hpp:199
void unload_arrays(int n, int vert_offset, const View< double **, CLayout, HostType > &buffer, int &arr_offset, const V &array)
Definition: transfer_vertex_data.hpp:165
int pair(int np, int p, int k)
Definition: transfer_vertex_data.hpp:6
KOKKOS_INLINE_FUNCTION double & vertex_access(const T &array, int i, int ip)
void resize_n_vertices(int nnodes, T &array)
VertexBuffer< HostType > transfer_data(const DistributionPlan &send_plan, const DistributionPlan &recv_plan, const MyMPI &mpi, bool async, const Vs &... arrays)
Definition: transfer_vertex_data.hpp:282
int n_doubles_per_vertex(const T &array)
int get_max_buf_size(const DistributionPlan &plan)
Definition: transfer_vertex_data.hpp:210
T::value_type sum_view(const T &view)
Definition: view_arithmetic.hpp:135