xgca/html/async__reassignment_8hpp_source.html

 #ifndef ASYNC_REASSIGNMENT_HPP

 #define ASYNC_REASSIGNMENT_HPP


 #include "timer_macro.hpp"

 #include "space_settings.hpp"

 #include "globals.hpp"

 #include "species.hpp"

 #include "domain_decomposition.hpp"

 #include "distribution.hpp"

 #include "col_grid.hpp"

 #include "velocity_grid.hpp"

 #include "view_arithmetic.hpp"

 #include "transfer_vertex_data.hpp"

 #include "collisions.hpp"


 extern "C" void my_qsort_f(int left,int right,int* idx,float* psi_surf2_tmp,int npsi_surf2);


 class AsyncReassignment{

     static constexpr bool ASYNC_TRANSFER = true;

     bool this_rank_sends_work; // Whether this rank sends work to another

     bool this_rank_recvs_work; // Whether this rank receives work from another

     DistributionPlan send_plan, recv_plan;

     DistributionPlan res_send_plan, res_recv_plan;

     VertexBuffer<HostType> inp_buffer, res_buffer;

     VertexList assigned;

     int first_send_offset; // The offset of the first vertex sent from this rank, LOCAL index

     int rank_sending_to_my_rank; // Algorithm currently assumes an underloaded rank can only receive work from one other

     int recv_global_offset; // The global offset of the received work


     public:


     AsyncReassignment(){}


     // TOY PROBLEM

     // set: send_plan, recv_plan, this_rank_sends_work, this_rank_recvs_work, rank_sending_to_my_rank, first_send_offset, recv_global_offset

     void toy_problem_rebalance(const CollisionGrid<DeviceType>& col_grid, const DomainDecomposition<DeviceType>& pol_decomp){

         static constexpr int N_RANKS_RECEIVING = 7; // Testing


         int n_ranks = pol_decomp.mpi.n_plane_ranks;

         int my_rank = pol_decomp.mpi.my_plane_rank;


         int big_rank = n_ranks-1;


         int nnodes_on_big_rank = pol_decomp.gvid0_pid_h(big_rank+1) - pol_decomp.gvid0_pid_h(big_rank);

         int nnodes_on_a_recv_rank = pol_decomp.gvid0_pid_h(1) - pol_decomp.gvid0_pid_h(0);

         int excess_nodes = nnodes_on_big_rank - nnodes_on_a_recv_rank;

         float fraction_excess_to_send = N_RANKS_RECEIVING*1.0/(1.0+N_RANKS_RECEIVING);

         int nnodes_to_send = excess_nodes*fraction_excess_to_send;


         // Round down to nearest N_RANKS_RECEIVING for convenience until offset info is sent too

         nnodes_to_send /= N_RANKS_RECEIVING;

         nnodes_to_send *= N_RANKS_RECEIVING;


         int nnodes_to_each = nnodes_to_send/N_RANKS_RECEIVING;


         if(my_rank==big_rank){

             this_rank_sends_work = true;


             // Used to unload buffer

             first_send_offset = nnodes_on_big_rank - nnodes_to_send;


             for(int i=0; i<N_RANKS_RECEIVING; i++){

                 send_plan.cnts(i) = nnodes_to_each;

                 send_plan.displs(i) = first_send_offset + i*nnodes_to_each;

             }

         }else if(my_rank<N_RANKS_RECEIVING){

             this_rank_recvs_work = true;

             rank_sending_to_my_rank = big_rank;


             recv_plan.cnts(rank_sending_to_my_rank) = nnodes_to_each;

             recv_plan.displs(rank_sending_to_my_rank) = 0;


             int sending_rank_node_offset = pol_decomp.gvid0_pid_h(big_rank) - 1;

             int offset_of_all_nnodes_sent = sending_rank_node_offset + nnodes_on_big_rank - nnodes_to_send;

             recv_global_offset = offset_of_all_nnodes_sent + nnodes_to_each*recv_plan.my_rank;

         }

     }


     // Sets send_plan, recv_plan, this_rank_sends_work, this_rank_recvs_work, rank_sending_to_my_rank, first_send_offset, recv_global_offset

     void rebalance(const CollisionGrid<DeviceType>& col_grid, const DomainDecomposition<DeviceType>& pol_decomp, const VertexList& assigned_original){

         int n_ranks = pol_decomp.mpi.n_plane_ranks;

         int my_rank = pol_decomp.mpi.my_plane_rank;


         // Sum col_grid.timing_all for each rank

         View<float*, HostType> rank_time("rank_time",n_ranks);

         for(int i=0; i<n_ranks; i++){

             for(int inode=pol_decomp.gvid0_pid_h(i)-1; inode<pol_decomp.gvid0_pid_h(i+1)-1; inode++){

                 rank_time(i) += col_grid.timing_all(inode);

             }

         }


         // Get sort index of ranks

         View<int*, HostType> sort_index_r(NoInit("sort_index"),n_ranks);

         for(int i=0; i<n_ranks; i++) sort_index_r(i) = i + 1; // idx is one-indexed because it is reordered in Fortran

         int left=1; int right=n_ranks;

         my_qsort_f(left,right,sort_index_r.data(),rank_time.data(),n_ranks);

         View<int*, HostType> sort_index(NoInit("sort_index"),n_ranks);

         for(int i=0; i<n_ranks; i++) sort_index(i) = sort_index_r(n_ranks-1-i) - 1;


         // Get average time per rank

         float average_time = sum_view(rank_time)/rank_time.size();


         // Adjust to more realistic target

         constexpr float IMBALANCE_MAX = 1.2;

         float target_time = average_time*IMBALANCE_MAX;

         float target_recv_time = average_time*1.1; // ?


 if(is_rank_zero()){

     printf("\nAsync collisions: average: %1.3e, target for sender: %1.3e, target for recver: %1.3e\n", average_time, target_time, target_recv_time);

 }


         int irecv_sorted = n_ranks-1;


         constexpr int MIN_PACKET_SIZE = 8; // Don't bother sending fewer vertices

         int last_packet_size = MIN_PACKET_SIZE;


         // Start with most expensive rank

         for(int isend_sorted=0; isend_sorted<n_ranks; isend_sorted++){

             int isend = sort_index(isend_sorted);

             int last_node = pol_decomp.gvid0_pid_h(isend+1) - 2;

             int first_node = pol_decomp.gvid0_pid_h(isend) - 1;


             float time_to_send = rank_time(isend) - target_time;

             int irecv = sort_index(irecv_sorted);

             float recv_max = target_recv_time - rank_time(irecv);


             // Nothing to send

             if(time_to_send<=0.0){

                 //continue;

                 // Since we are sorted by time, we can break from the loop rather than continue

                 break;

             }


             // No space to receive. Since we are sorted by time, we can break from the loop rather than continue to next receiver

             if(recv_max<=0.0){

                 break;

             }


             if(last_packet_size<MIN_PACKET_SIZE){

                 break; // Assume that it's not worth sending a small packet, and that the loop is roughly organized by possible packet size (may not be true due to variations in n_iterations and n_subcycles)

             }


             int current_n_to_send = 0;

             int current_n_real_to_send = 0; // Counts only vertices with nonzero timing

             float current_time = 0.0;

             float cumul_time = 0.0;

             // Work backwards loading nodes (must leave at least one node, so use > rather than >=

             for(int inode=last_node; inode>first_node; inode--){ // This is GLOBAL index

                 // Add vertex to current packet

                 current_n_to_send++;

                 if(col_grid.timing_all(inode)!=0.0) current_n_real_to_send++;

                 current_time += col_grid.timing_all(inode);

                 cumul_time += col_grid.timing_all(inode);


                 bool done_sending = (current_time>=time_to_send || inode==first_node+1);

                 bool done_recving = (current_time>=recv_max);


                 bool packet_complete = (done_sending || done_recving);


                 if(packet_complete){

                     // If the packet is too small, break from the recv loop; the sender loop will break too

                     last_packet_size = current_n_real_to_send;

                     if(last_packet_size<MIN_PACKET_SIZE) break;


                     // Assign the packet

                     if(my_rank==isend){

                         send_plan.cnts(irecv) = current_n_to_send;

                         send_plan.displs(irecv) = inode - pol_decomp.node_offset;

                         this_rank_sends_work = true; // Updated repeatedly

                         first_send_offset = send_plan.displs(irecv); // Updated repeatedly, last is correct

                     }

                     if(my_rank==irecv){

                         recv_plan.cnts(isend) = current_n_to_send;

                         recv_plan.displs(isend) = 0;

                         this_rank_recvs_work = true;

                         rank_sending_to_my_rank = isend;

                         recv_global_offset = inode;

                     }

 if(is_rank_zero()) printf("\n  Packet: %d recv from %d: %d nds [%d-%d] (%d real) (time est: %1.3e; %1.3e->%1.3e, %1.3e->%1.3e)", irecv, isend, current_n_to_send,inode, inode+current_n_to_send, current_n_real_to_send, current_time, rank_time(irecv), rank_time(irecv)+current_time, rank_time(isend)-cumul_time+current_time, rank_time(isend)-cumul_time);


                     // Move to next recver, *even if this one is not full*

                     irecv_sorted -= 1;

                     irecv = sort_index(irecv_sorted);

                     recv_max = target_recv_time - rank_time(irecv);


                     if(recv_max<=0.0) break; // Next receiver can't accept anything, we are done


                     // Reset counters

                     current_n_to_send = 0;

                     current_n_real_to_send = 0;

                     current_time = 0.0;

                 }


                 // If all nodes that need to be reassigned have been, move to next sender

                 if(done_sending) break;

             } // recver loop

         } // sender loop

     }


     void rebalance_plan(const CollisionGrid<DeviceType>& col_grid, const DomainDecomposition<DeviceType>& pol_decomp, VertexList& assigned_original){

         int n_ranks = pol_decomp.mpi.n_plane_ranks;

         int my_rank = pol_decomp.mpi.my_plane_rank;


         // Initialize plans to 0.0

         send_plan = DistributionPlan(my_rank, n_ranks);

         recv_plan = DistributionPlan(my_rank, n_ranks);

         Kokkos::deep_copy(send_plan.cnts, 0.0);

         Kokkos::deep_copy(send_plan.displs, 0.0);

         Kokkos::deep_copy(recv_plan.cnts, 0.0);

         Kokkos::deep_copy(recv_plan.displs, 0.0);


         // Set send_plan, recv_plan, this_rank_sends_work, this_rank_recvs_work, rank_sending_to_my_rank, first_send_offset

         //toy_problem_rebalance(col_grid, pol_decomp);

         rebalance(col_grid, pol_decomp, assigned_original);


         // Adjust assignments

         if(this_rank_sends_work){

             // Shorten assignment to not include the sent nodes

             assigned_original = assigned_original & VertexList(pol_decomp.node_offset, pol_decomp.node_offset + first_send_offset);

         }

         if(this_rank_recvs_work){

             GPTLstart("F_COL_ASSGN");

             // Get sending rank's assignment

             VertexList assigned_to_sender = col_grid.vertices & pol_decomp.vertex_list(rank_sending_to_my_rank);

             //bool symmetric_f = false;

             //if(symmetric_f || !pol_decomp.pol_decomp) mpi_split_assignments(pol_decomp, symmetric_f, assigned_to_sender); // BUG - uses wrong my_rank

             GPTLstop("F_COL_ASSGN");


             GPTLstart("F_COL_ASSGN2");

             int nodes_assigned = recv_plan.cnts(rank_sending_to_my_rank);

             assigned = assigned_to_sender & VertexList(recv_global_offset, recv_global_offset+nodes_assigned);

             GPTLstop("F_COL_ASSGN2");

         }


         // result plan is reversed:

         res_send_plan = DistributionPlan(my_rank, n_ranks);

         res_recv_plan = DistributionPlan(my_rank, n_ranks);

         Kokkos::deep_copy(res_send_plan.cnts, recv_plan.cnts);

         Kokkos::deep_copy(res_recv_plan.cnts, send_plan.cnts);

         Kokkos::deep_copy(res_send_plan.displs, recv_plan.displs);

         Kokkos::deep_copy(res_recv_plan.displs, send_plan.displs);


         if(this_rank_sends_work){

             // res_recv_plan (and recv_plan) are mappings into a buffer, so start at 0.0

             for(int i=0; i<res_recv_plan.displs.size(); i++){

                 if(res_recv_plan.cnts(i)>0) res_recv_plan.displs(i) -= first_send_offset;

             }

         }

     }


     AsyncReassignment(const DomainDecomposition<DeviceType>& pol_decomp, const CollisionGrid<DeviceType>& col_grid, const CollisionSpecies<DeviceType>& col_spall, const VGridDistribution<HostType>& df0g_tmp,

                                       const View<double*,CLayout,HostType>& node_cost, VertexList& assigned_original)

       : this_rank_sends_work(false), this_rank_recvs_work(false)

     {

         // Choose rebalance plan and split assignment

         GPTLstart("ASYNC_COL_REBALANCE");

         rebalance_plan(col_grid, pol_decomp, assigned_original);

         GPTLstop("ASYNC_COL_REBALANCE");


         if(this_rank_sends_work && this_rank_recvs_work) exit_XGC("\nError: Cannot send and receive work\n");


         if(this_rank_recvs_work){

             GPTLstart("ASYNC_COL_RECV_INPUTS");

             // irecv excess vertices

             inp_buffer = transfer_data(send_plan, recv_plan, pol_decomp.mpi, ASYNC_TRANSFER, col_spall.den_moment, col_spall.temp_moment, col_spall.f, col_spall.fg_temp_ev_all);

             GPTLstop("ASYNC_COL_RECV_INPUTS");

         }


         if(this_rank_sends_work){

             GPTLstart("ASYNC_COL_SEND_INPUTS");

             // Send excess vertices

             inp_buffer = transfer_data(send_plan, recv_plan, pol_decomp.mpi, ASYNC_TRANSFER, col_spall.den_moment, col_spall.temp_moment, col_spall.f, col_spall.fg_temp_ev_all);

             GPTLstop("ASYNC_COL_SEND_INPUTS");


             if(ASYNC_TRANSFER){

                 GPTLstart("ASYNC_COL_RECV_RESULTS");

                 // Irecv results

                 res_buffer = transfer_data(res_send_plan, res_recv_plan, pol_decomp.mpi, ASYNC_TRANSFER, df0g_tmp, node_cost);

                 GPTLstop("ASYNC_COL_RECV_RESULTS");

             }

         }

     }


     void execute(const CollisionGrid<DeviceType>& col_grid, double dt, const DomainDecomposition<DeviceType>& pol_decomp, const CollisionSpecies<DeviceType>& col_spall, const View<int*,CLayout,HostType>& converged_all, const VGridDistribution<HostType>& df0g_tmp,

                                       const View<double*,CLayout,HostType>& node_cost){

         if(this_rank_recvs_work){

             GPTLstart("ASYNC_COL_AWAIT_INPUTS");

             // Wait for the data to arrive from other rank

             inp_buffer.await_recvs();

             GPTLstop("ASYNC_COL_AWAIT_INPUTS");


             // Resize decomposed allocations whose data was placed in the buffer

             int nnode = inp_buffer.nnode();

             GPTLstart("ASYNC_COL_SPALL2");

             CollisionSpecies<DeviceType> col_spall2(col_spall, nnode);

             GPTLstop("ASYNC_COL_SPALL2");

             GPTLstart("ASYNC_COL_UNLOAD_INPUTS");

             inp_buffer.unload(0, col_spall2.den_moment, col_spall2.temp_moment, col_spall2.f, col_spall2.fg_temp_ev_all);

             GPTLstop("ASYNC_COL_UNLOAD_INPUTS");


             // Allocate df0g_tmp and set to zero to ensure that skipped/unconverged nodes don't contribute

             GPTLstart("ASYNC_COL_DF0G2");

             VGridDistribution<HostType> df0g_tmp2(col_spall2.f.n_species(), col_spall2.f);

             GPTLstop("ASYNC_COL_DF0G2");


             // Set up some required inputs/outputs

             int node_offset = recv_global_offset;

             const View<int*,CLayout,HostType> converged_local = Kokkos::subview(converged_all, Kokkos::make_pair(node_offset,node_offset+nnode));

             const View<int*,CLayout,HostType> n_subcycles_local = Kokkos::subview(col_grid.n_subcycles, Kokkos::make_pair(node_offset,node_offset+nnode));

             GPTLstart("ASYNC_COL_NC2");

             View<double*,CLayout,HostType> node_cost2("node_cost2", nnode);

             GPTLstop("ASYNC_COL_NC2");


             GPTLstart("F_COL_SHIFT_ASSGN");

             // Shift to correspond to local nodes

             assigned.shift(-node_offset);

             GPTLstop("F_COL_SHIFT_ASSGN");


             // Run newly assigned work

             GPTLstart("ASYNC_COL_SUBCYCLE");

             subcycle_collisions(col_grid, col_spall2, dt, n_subcycles_local, assigned, converged_local, df0g_tmp2, node_cost2);

             GPTLstop("ASYNC_COL_SUBCYCLE");


             // Irecv results. Note for results, send_plan and recv_plan are reversed

             // No need to send converged_local, because converged_all is all-reduced below

             GPTLstart("ASYNC_COL_SEND_RESULTS");

             res_buffer = transfer_data(res_send_plan, res_recv_plan, pol_decomp.mpi, ASYNC_TRANSFER, df0g_tmp2, node_cost2);

             GPTLstop("ASYNC_COL_SEND_RESULTS");


             // Wait on sends, seems unavoidable

             GPTLstart("ASYNC_COL_AWAIT_RES_SENDS");

             res_buffer.await_sends();

             GPTLstop("ASYNC_COL_AWAIT_RES_SENDS");

         }


         if(this_rank_sends_work){

             // Make sure sends are complete. This could/should be done earlier (mid-collisions) but not sure how right now. Probably need to reserve a thread for that

             GPTLstart("ASYNC_COL_AWAIT_INP_SENDS");

             inp_buffer.await_sends();

             GPTLstop("ASYNC_COL_AWAIT_INP_SENDS");


             if(!ASYNC_TRANSFER){

                 GPTLstart("ASYNC_COL_RECV_RESULTS");

                 // Irecv results

                 res_buffer = transfer_data(res_send_plan, res_recv_plan, pol_decomp.mpi, ASYNC_TRANSFER, df0g_tmp, node_cost);

                 GPTLstop("ASYNC_COL_RECV_RESULTS");

             }


             // Wait for the results to arrive from other rank

             GPTLstart("ASYNC_COL_AWAIT_RESULTS");

             res_buffer.await_recvs();

             GPTLstop("ASYNC_COL_AWAIT_RESULTS");


             // Unload into local result arrays

             GPTLstart("ASYNC_COL_UNLOAD_RESULTS");

             res_buffer.unload(first_send_offset, df0g_tmp, node_cost);

             GPTLstop("ASYNC_COL_UNLOAD_RESULTS");

         }

     }

 };


 #endif

my_qsort_f
void my_qsort_f(int left, int right, int *idx, float *psi_surf2_tmp, int npsi_surf2)

AsyncReassignment
Definition: async_reassignment.hpp:18

AsyncReassignment::send_plan
DistributionPlan send_plan
Definition: async_reassignment.hpp:22

AsyncReassignment::res_recv_plan
DistributionPlan res_recv_plan
Definition: async_reassignment.hpp:23

AsyncReassignment::rank_sending_to_my_rank
int rank_sending_to_my_rank
Definition: async_reassignment.hpp:27

AsyncReassignment::AsyncReassignment
AsyncReassignment(const DomainDecomposition< DeviceType > &pol_decomp, const CollisionGrid< DeviceType > &col_grid, const CollisionSpecies< DeviceType > &col_spall, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost, VertexList &assigned_original)
Definition: async_reassignment.hpp:251

AsyncReassignment::execute
void execute(const CollisionGrid< DeviceType > &col_grid, double dt, const DomainDecomposition< DeviceType > &pol_decomp, const CollisionSpecies< DeviceType > &col_spall, const View< int *, CLayout, HostType > &converged_all, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost)
Definition: async_reassignment.hpp:284

AsyncReassignment::AsyncReassignment
AsyncReassignment()
Definition: async_reassignment.hpp:32

AsyncReassignment::rebalance
void rebalance(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp, const VertexList &assigned_original)
Definition: async_reassignment.hpp:80

AsyncReassignment::this_rank_sends_work
bool this_rank_sends_work
Definition: async_reassignment.hpp:20

AsyncReassignment::recv_plan
DistributionPlan recv_plan
Definition: async_reassignment.hpp:22

AsyncReassignment::this_rank_recvs_work
bool this_rank_recvs_work
Definition: async_reassignment.hpp:21

AsyncReassignment::ASYNC_TRANSFER
static constexpr bool ASYNC_TRANSFER
Definition: async_reassignment.hpp:19

AsyncReassignment::inp_buffer
VertexBuffer< HostType > inp_buffer
Definition: async_reassignment.hpp:24

AsyncReassignment::recv_global_offset
int recv_global_offset
Definition: async_reassignment.hpp:28

AsyncReassignment::toy_problem_rebalance
void toy_problem_rebalance(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp)
Definition: async_reassignment.hpp:36

AsyncReassignment::res_buffer
VertexBuffer< HostType > res_buffer
Definition: async_reassignment.hpp:24

AsyncReassignment::assigned
VertexList assigned
Definition: async_reassignment.hpp:25

AsyncReassignment::rebalance_plan
void rebalance_plan(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp, VertexList &assigned_original)
Definition: async_reassignment.hpp:200

AsyncReassignment::res_send_plan
DistributionPlan res_send_plan
Definition: async_reassignment.hpp:23

AsyncReassignment::first_send_offset
int first_send_offset
Definition: async_reassignment.hpp:26

CollisionGrid< DeviceType >

CollisionGrid::vertices
VertexList vertices
Definition: col_grid.hpp:135

CollisionGrid::timing_all
View< float *, CLayout, HostType > timing_all
Definition: col_grid.hpp:133

CollisionGrid::n_subcycles
View< int *, CLayout, HostType > n_subcycles
Number of subcycles for each vertex.
Definition: col_grid.hpp:132

CollisionSpecies
Definition: col_species.hpp:105

CollisionSpecies::den_moment
View< double **, CLayout, HostType > den_moment
Definition: col_species.hpp:119

CollisionSpecies::temp_moment
View< double **, CLayout, HostType > temp_moment
Definition: col_species.hpp:120

CollisionSpecies::f
const VGridDistribution< HostType > f
Definition: col_species.hpp:107

CollisionSpecies::fg_temp_ev_all
std::vector< View< double *, CLayout, HostType > > fg_temp_ev_all
Definition: col_species.hpp:123

DomainDecomposition< DeviceType >

DomainDecomposition::vertex_list
VertexList vertex_list() const
Definition: domain_decomposition.cpp:279

DomainDecomposition::gvid0_pid_h
Kokkos::View< int *, Kokkos::LayoutRight, HostType > gvid0_pid_h
Which processors get which vertices (host)
Definition: domain_decomposition.hpp:99

DomainDecomposition::node_offset
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:95

VGridDistribution< HostType >

VGridDistribution::n_species
KOKKOS_INLINE_FUNCTION int n_species() const
Definition: vgrid_distribution.hpp:187

VertexList
Definition: vertex_list.hpp:53

VertexList::shift
void shift(int shift_val)
Definition: vertex_list.cpp:246

col_grid.hpp

subcycle_collisions
void subcycle_collisions(const CollisionGrid< DeviceType > &col_grid, const CollisionSpecies< DeviceType > &col_spall, double dt, const View< int *, CLayout, HostType > &n_subcycles_local, VertexList &assigned, const View< int *, CLayout, HostType > &converged_local, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost)
Definition: collisions.cpp:188

collisions.hpp

distribution.hpp

domain_decomposition.hpp

globals.hpp

exit_XGC
void exit_XGC(std::string msg)
Definition: globals.hpp:37

is_rank_zero
bool is_rank_zero()
Definition: globals.hpp:27

sml_module::false
logical false
Definition: module.F90:102

space_settings.hpp

NoInit
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69

species.hpp

DistributionPlan
Definition: domain_decomposition.hpp:16

DistributionPlan::displs
View< int *, CLayout, HostType > displs
Definition: domain_decomposition.hpp:18

DistributionPlan::cnts
View< int *, CLayout, HostType > cnts
Definition: domain_decomposition.hpp:17

DistributionPlan::my_rank
int my_rank
Definition: domain_decomposition.hpp:19

VertexBuffer< HostType >

VertexBuffer::unload
void unload(int vertex_offset, const Vs &... arrays) const
Definition: transfer_vertex_data.hpp:250

VertexBuffer::await_sends
void await_sends()
Definition: transfer_vertex_data.hpp:269

VertexBuffer::nnode
int nnode() const
Definition: transfer_vertex_data.hpp:257

VertexBuffer::await_recvs
void await_recvs()
Definition: transfer_vertex_data.hpp:261

timer_macro.hpp

GPTLstart
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9

GPTLstop
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10

transfer_vertex_data.hpp

transfer_data
VertexBuffer< HostType > transfer_data(const DistributionPlan &send_plan, const DistributionPlan &recv_plan, const MyMPI &mpi, bool async, const Vs &... arrays)
Definition: transfer_vertex_data.hpp:282

velocity_grid.hpp

view_arithmetic.hpp

sum_view
T::value_type sum_view(const T &view)
Definition: view_arithmetic.hpp:135