1 #ifndef ASYNC_REASSIGNMENT_HPP
2 #define ASYNC_REASSIGNMENT_HPP
16 extern "C" void my_qsort_f(
int left,
int right,
int* idx,
float* psi_surf2_tmp,
int npsi_surf2);
37 static constexpr
int N_RANKS_RECEIVING = 7;
39 int n_ranks = pol_decomp.mpi.n_plane_ranks;
40 int my_rank = pol_decomp.mpi.my_plane_rank;
42 int big_rank = n_ranks-1;
46 int excess_nodes = nnodes_on_big_rank - nnodes_on_a_recv_rank;
47 float fraction_excess_to_send = N_RANKS_RECEIVING*1.0/(1.0+N_RANKS_RECEIVING);
48 int nnodes_to_send = excess_nodes*fraction_excess_to_send;
51 nnodes_to_send /= N_RANKS_RECEIVING;
52 nnodes_to_send *= N_RANKS_RECEIVING;
54 int nnodes_to_each = nnodes_to_send/N_RANKS_RECEIVING;
56 if(my_rank==big_rank){
62 for(
int i=0; i<N_RANKS_RECEIVING; i++){
66 }
else if(my_rank<N_RANKS_RECEIVING){
73 int sending_rank_node_offset = pol_decomp.
gvid0_pid_h(big_rank) - 1;
74 int offset_of_all_nnodes_sent = sending_rank_node_offset + nnodes_on_big_rank - nnodes_to_send;
81 int n_ranks = pol_decomp.mpi.n_plane_ranks;
82 int my_rank = pol_decomp.mpi.my_plane_rank;
85 View<float*, HostType> rank_time(
"rank_time",n_ranks);
86 for(
int i=0; i<n_ranks; i++){
93 View<int*, HostType> sort_index_r(
NoInit(
"sort_index"),n_ranks);
94 for(
int i=0; i<n_ranks; i++) sort_index_r(i) = i + 1;
95 int left=1;
int right=n_ranks;
96 my_qsort_f(left,right,sort_index_r.data(),rank_time.data(),n_ranks);
97 View<int*, HostType> sort_index(
NoInit(
"sort_index"),n_ranks);
98 for(
int i=0; i<n_ranks; i++) sort_index(i) = sort_index_r(n_ranks-1-i) - 1;
101 float average_time =
sum_view(rank_time)/rank_time.size();
104 constexpr
float IMBALANCE_MAX = 1.2;
105 float target_time = average_time*IMBALANCE_MAX;
106 float target_recv_time = average_time*1.1;
109 printf(
"\nAsync collisions: average: %1.3e, target for sender: %1.3e, target for recver: %1.3e\n", average_time, target_time, target_recv_time);
112 int irecv_sorted = n_ranks-1;
114 constexpr
int MIN_PACKET_SIZE = 8;
115 int last_packet_size = MIN_PACKET_SIZE;
118 for(
int isend_sorted=0; isend_sorted<n_ranks; isend_sorted++){
119 int isend = sort_index(isend_sorted);
120 int last_node = pol_decomp.
gvid0_pid_h(isend+1) - 2;
121 int first_node = pol_decomp.
gvid0_pid_h(isend) - 1;
123 float time_to_send = rank_time(isend) - target_time;
124 int irecv = sort_index(irecv_sorted);
125 float recv_max = target_recv_time - rank_time(irecv);
128 if(time_to_send<=0.0){
139 if(last_packet_size<MIN_PACKET_SIZE){
143 int current_n_to_send = 0;
144 int current_n_real_to_send = 0;
145 float current_time = 0.0;
146 float cumul_time = 0.0;
148 for(
int inode=last_node; inode>first_node; inode--){
151 if(col_grid.
timing_all(inode)!=0.0) current_n_real_to_send++;
155 bool done_sending = (current_time>=time_to_send || inode==first_node+1);
156 bool done_recving = (current_time>=recv_max);
158 bool packet_complete = (done_sending || done_recving);
162 last_packet_size = current_n_real_to_send;
163 if(last_packet_size<MIN_PACKET_SIZE)
break;
179 if(
is_rank_zero()) printf(
"\n Packet: %d recv from %d: %d nds [%d-%d] (%d real) (time est: %1.3e; %1.3e->%1.3e, %1.3e->%1.3e)", irecv, isend, current_n_to_send,inode, inode+current_n_to_send, current_n_real_to_send, current_time, rank_time(irecv), rank_time(irecv)+current_time, rank_time(isend)-cumul_time+current_time, rank_time(isend)-cumul_time);
183 irecv = sort_index(irecv_sorted);
184 recv_max = target_recv_time - rank_time(irecv);
186 if(recv_max<=0.0)
break;
189 current_n_to_send = 0;
190 current_n_real_to_send = 0;
195 if(done_sending)
break;
201 int n_ranks = pol_decomp.mpi.n_plane_ranks;
202 int my_rank = pol_decomp.mpi.my_plane_rank;
214 rebalance(col_grid, pol_decomp, assigned_original);
252 const View<double*,CLayout,HostType>& node_cost,
VertexList& assigned_original)
285 const View<double*,CLayout,HostType>& node_cost){
299 GPTLstop(
"ASYNC_COL_UNLOAD_INPUTS");
308 const View<int*,CLayout,HostType> converged_local = Kokkos::subview(converged_all, Kokkos::make_pair(node_offset,node_offset+nnode));
309 const View<int*,CLayout,HostType> n_subcycles_local = Kokkos::subview(col_grid.
n_subcycles, Kokkos::make_pair(node_offset,node_offset+nnode));
311 View<double*,CLayout,HostType> node_cost2(
"node_cost2", nnode);
333 GPTLstop(
"ASYNC_COL_AWAIT_RES_SENDS");
340 GPTLstop(
"ASYNC_COL_AWAIT_INP_SENDS");
352 GPTLstop(
"ASYNC_COL_AWAIT_RESULTS");
357 GPTLstop(
"ASYNC_COL_UNLOAD_RESULTS");
void my_qsort_f(int left, int right, int *idx, float *psi_surf2_tmp, int npsi_surf2)
Definition: async_reassignment.hpp:18
DistributionPlan send_plan
Definition: async_reassignment.hpp:22
DistributionPlan res_recv_plan
Definition: async_reassignment.hpp:23
int rank_sending_to_my_rank
Definition: async_reassignment.hpp:27
AsyncReassignment(const DomainDecomposition< DeviceType > &pol_decomp, const CollisionGrid< DeviceType > &col_grid, const CollisionSpecies< DeviceType > &col_spall, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost, VertexList &assigned_original)
Definition: async_reassignment.hpp:251
void execute(const CollisionGrid< DeviceType > &col_grid, double dt, const DomainDecomposition< DeviceType > &pol_decomp, const CollisionSpecies< DeviceType > &col_spall, const View< int *, CLayout, HostType > &converged_all, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost)
Definition: async_reassignment.hpp:284
AsyncReassignment()
Definition: async_reassignment.hpp:32
void rebalance(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp, const VertexList &assigned_original)
Definition: async_reassignment.hpp:80
bool this_rank_sends_work
Definition: async_reassignment.hpp:20
DistributionPlan recv_plan
Definition: async_reassignment.hpp:22
bool this_rank_recvs_work
Definition: async_reassignment.hpp:21
static constexpr bool ASYNC_TRANSFER
Definition: async_reassignment.hpp:19
VertexBuffer< HostType > inp_buffer
Definition: async_reassignment.hpp:24
int recv_global_offset
Definition: async_reassignment.hpp:28
void toy_problem_rebalance(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp)
Definition: async_reassignment.hpp:36
VertexBuffer< HostType > res_buffer
Definition: async_reassignment.hpp:24
VertexList assigned
Definition: async_reassignment.hpp:25
void rebalance_plan(const CollisionGrid< DeviceType > &col_grid, const DomainDecomposition< DeviceType > &pol_decomp, VertexList &assigned_original)
Definition: async_reassignment.hpp:200
DistributionPlan res_send_plan
Definition: async_reassignment.hpp:23
int first_send_offset
Definition: async_reassignment.hpp:26
VertexList vertices
Definition: col_grid.hpp:135
View< float *, CLayout, HostType > timing_all
Definition: col_grid.hpp:133
View< int *, CLayout, HostType > n_subcycles
Number of subcycles for each vertex.
Definition: col_grid.hpp:132
Definition: col_species.hpp:105
View< double **, CLayout, HostType > den_moment
Definition: col_species.hpp:119
View< double **, CLayout, HostType > temp_moment
Definition: col_species.hpp:120
const VGridDistribution< HostType > f
Definition: col_species.hpp:107
std::vector< View< double *, CLayout, HostType > > fg_temp_ev_all
Definition: col_species.hpp:123
VertexList vertex_list() const
Definition: domain_decomposition.cpp:279
Kokkos::View< int *, Kokkos::LayoutRight, HostType > gvid0_pid_h
Which processors get which vertices (host)
Definition: domain_decomposition.hpp:95
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:91
KOKKOS_INLINE_FUNCTION int n_species() const
Definition: vgrid_distribution.hpp:187
Definition: vertex_list.hpp:53
void shift(int shift_val)
Definition: vertex_list.cpp:246
void subcycle_collisions(const CollisionGrid< DeviceType > &col_grid, const CollisionSpecies< DeviceType > &col_spall, double dt, const View< int *, CLayout, HostType > &n_subcycles_local, VertexList &assigned, const View< int *, CLayout, HostType > &converged_local, const VGridDistribution< HostType > &df0g_tmp, const View< double *, CLayout, HostType > &node_cost)
Definition: collisions.cpp:188
void exit_XGC(std::string msg)
Definition: globals.hpp:37
bool is_rank_zero()
Definition: globals.hpp:27
logical false
Definition: module.F90:102
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:69
Definition: domain_decomposition.hpp:15
View< int *, CLayout, HostType > displs
Definition: domain_decomposition.hpp:17
View< int *, CLayout, HostType > cnts
Definition: domain_decomposition.hpp:16
int my_rank
Definition: domain_decomposition.hpp:18
void unload(int vertex_offset, const Vs &... arrays) const
Definition: transfer_vertex_data.hpp:250
void await_sends()
Definition: transfer_vertex_data.hpp:269
int nnode() const
Definition: transfer_vertex_data.hpp:257
void await_recvs()
Definition: transfer_vertex_data.hpp:261
static int GPTLstart(const char *name)
Definition: timer_macro.hpp:9
static int GPTLstop(const char *name)
Definition: timer_macro.hpp:10
VertexBuffer< HostType > transfer_data(const DistributionPlan &send_plan, const DistributionPlan &recv_plan, const MyMPI &mpi, bool async, const Vs &... arrays)
Definition: transfer_vertex_data.hpp:282
T::value_type sum_view(const T &view)
Definition: view_arithmetic.hpp:135