XGCa
field_decomposition.hpp
Go to the documentation of this file.
1 #ifndef FIELD_DECOMPOSITION_HPP
2 #define FIELD_DECOMPOSITION_HPP
3 #include "space_settings.hpp"
4 #include "NamelistReader.hpp"
5 #include "globals.hpp"
6 #ifdef USE_MPI
7 # include "my_mpi.hpp"
8 #endif
9 
10 // Field decomposition class
11 template<class Device>
13  public:
14 
15 #ifdef USE_MPI
16  // MPI communicators etc
17  MyMPI mpi;
18 #endif
19 
20  // Constants
21  int n_ranks;
26 
31 
32  int first_node;
33  int last_node;
34  int n_nodes;
36  int last_plane;
37  int n_planes;
38 
40 
41  // Views
42  View<int*,CLayout,HostType> map_from_global_intpl;
43  View<int*,CLayout,HostType> all_first_node;
44  View<int*,CLayout,HostType> all_last_node;
45  View<int*,CLayout,HostType> all_first_plane;
46  View<int*,CLayout,HostType> all_last_plane;
47 
49 
50  // Constructor
51  FieldDecomposition(NLReader::NamelistReader& nlr, int nplanes, int nnodes){
52 #ifdef USE_MPI
53  nlr.use_namelist("field_decomp_param");
54  n_ranks = nlr.get<int>("n_ranks", 6); // Number of ranks across which the fields are decomposed. The larger this number, the less memory is used per rank. However, for best performance, this number should be as small as possible.
55  n_phi_domains = nlr.get<int>("n_phi_domains",n_ranks); // Number of subdomains in the toroidal direction. By default, there is no poloidal decomposition.
56  n_ghost_planes = nlr.get<int>("n_ghost_planes", 1); // Number of ghost planes on each side of a subdomain. Toroidal overlap is not required, but may be useful for performance since it reduces the need to shift particles between ranks.
57  n_ghost_vertices = nlr.get<int>("n_ghost_vertices", 3000); // Number of ghost vertices on each side of the subdomain within the poloidal plane. It must be large enough to ensure that all neighboring vertices of the subdomain are present. However, this depends entirely on the grid since there is no guarantee on the order of vertices in an XGC grid.
58  use_near_field = nlr.get<bool>("use_near_field", false); // For better load balancing, have each rank retain the field near its global domain decomposition in addition to its secondary domain
59 
60 #ifdef STELLARATOR
61  // For stellarators, setup is currently forced: field_decomp.mpi.comm = pol_decomp.mpi.intpl_comm
62  // Other options may be possible later
63  n_ranks = nplanes; // The field is divided so that each rank has the field data of its plane
64  n_phi_domains = n_ranks; // No poloidal decomposition
65  n_ghost_planes = 0; // No ghost planes
66  n_ghost_vertices = 0; // No ghost vertices (full plane is present)
67  use_near_field = false; // No near field is necessary since field_decomp.mpi.comm = pol_decomp.mpi.intpl_comm
68 #endif
69 
70  // If there is no phi decomposition, don't use ghost planes
71  if(n_phi_domains==1) n_ghost_planes = 0;
72 
73  n_pol_domains = n_ranks/n_phi_domains; // Number of poloidal domains is inferred
74 
75  // Temporarily create a new com which is SML_COMM split into contiguous sets of "n_ranks" processes
76  MPI_Comm comm;
77  MPI_Comm_split(SML_COMM_WORLD, SML_COMM_RANK/n_ranks, SML_COMM_RANK, &comm);
78  // Split up comm using same class as standard decomposition (mpi.comm, mpi.plane_comm and mpi.intpl_comm)
79  mpi = MyMPI(comm, n_phi_domains);
80 
81  all_first_node = View<int*,CLayout,HostType>("all_first_node",n_ranks);
82  all_last_node = View<int*,CLayout,HostType>("all_last_node",n_ranks);
83  all_first_plane = View<int*,CLayout,HostType>("all_first_plane",n_ranks);
84  all_last_plane = View<int*,CLayout,HostType>("all_last_plane",n_ranks);
85 
86  // Some checks
88  exit_XGC("\nError in field_decomposition: n_ranks must be divisible by n_phi_domains");
89 
90  // This restriction could be relaxed
91  int nplanes_per_phi_domain = nplanes/n_phi_domains;
92  if(nplanes_per_phi_domain*n_phi_domains != nplanes)
93  exit_XGC("\nError in field_decomposition: n_phi_domains must divide evenly into nplanes");
94 
95  if(mpi.nranks != n_ranks)
96  exit_XGC("\nError in field_decomposition: total XGC ranks must be divisible by field_decomp_param n_ranks");
97 
98  // Go through each domain and determine its size/offset
99  // Planes are contiguous in the communicator so they are the inner loop
100  // Everything here is ZERO-INDEXED
101  int nodes_per_pol_domain = nnodes/n_pol_domains; // FLOOR
102  int nodes_per_phi_domain = nplanes/n_phi_domains; // FLOOR
103  int i = 0;
104  for(int i_pol=0; i_pol<n_pol_domains; i_pol++){
105  int first_owned_node_r = nodes_per_pol_domain*i_pol;
106  int n_owned_nodes_r = (i_pol==n_pol_domains-1 ? (nnodes - first_owned_node_r) : nodes_per_pol_domain);
107  for(int i_phi=0; i_phi<n_phi_domains; i_phi++){
108  int first_owned_plane_r = nodes_per_phi_domain*i_phi;
109  int n_owned_planes_r = (i_phi==n_phi_domains-1 ? (nplanes - first_owned_plane_r) : nodes_per_phi_domain);
110 
111  // Add modulo'd ghost cells for phi, cut off ghost cells for pol
112  all_first_node(i) = std::max(first_owned_node_r - n_ghost_vertices, 0);
113  all_last_node(i) = std::min(first_owned_node_r + n_owned_nodes_r - 1 + n_ghost_vertices, nnodes-1);
114 
115  // If nplanes ends up being the entire domain
116  if((n_owned_planes_r + 2*n_ghost_planes) >= nplanes){
117  all_first_plane(i) = 0;
118  all_last_plane(i) = nplanes-1;
119  }else{
120  all_first_plane(i) = positive_modulo(first_owned_plane_r - n_ghost_planes,nplanes);
121  all_last_plane(i) = (first_owned_plane_r + n_owned_planes_r - 1 + n_ghost_planes)%nplanes;
122  }
123 
124 
125  // Finally, set for this rank
126  if(i==mpi.my_rank){
127  first_owned_node = first_owned_node_r;
128  nnodes_owned = n_owned_nodes_r;
129  first_owned_plane = first_owned_plane_r;
130  nplanes_owned = n_owned_planes_r;
131 
134  n_nodes = last_node - first_node + 1;
138  }
139 
140  // Advance to next rank
141  i++;
142  }
143  }
144 #endif
145  }
146 
147  KOKKOS_INLINE_FUNCTION int find_domain_owner(int global_plane_index, int nplanes_total, int global_node_index, int nnodes_total) const{
148  // These are enforced to be divisible
149  int planes_per_phi_domain = nplanes_total/n_phi_domains;
150  int intpl_pid = global_plane_index/planes_per_phi_domain;
151 
152  // Poloidal decomposition should work out despite not necessarily being divisible
153  int n_vertices_per_pol_domain = nnodes_total/n_pol_domains;
154  int plane_pid = global_node_index/n_vertices_per_pol_domain;
155 
156  // calculate global pid from plane and intpl coordinates
157  return (intpl_pid + n_phi_domains*plane_pid);
158  }
159 
160  int all_n_nodes(int local_pid) const{
161  return all_last_node(local_pid) - all_first_node(local_pid) + 1;
162  }
163 
164  int all_n_planes(int local_pid, int nplanes) const{
165  return positive_modulo(all_last_plane(local_pid) - all_first_plane(local_pid), nplanes) + 1;
166  }
167 };
168 
169 #endif
Definition: field_decomposition.hpp:12
FieldDecomposition()
Definition: field_decomposition.hpp:48
int n_pol_domains
Number of domains in the poloidal plane.
Definition: field_decomposition.hpp:23
int first_owned_plane
First plane belonging to this rank, NOT including ghost planes.
Definition: field_decomposition.hpp:29
int n_ghost_planes
Number of ghost planes on each side of domain.
Definition: field_decomposition.hpp:24
View< int *, CLayout, HostType > all_last_node
Last node of each rank.
Definition: field_decomposition.hpp:44
int n_ranks
Number of ranks the field will be divided between.
Definition: field_decomposition.hpp:21
int last_node
Last node belonging to this rank, including ghost nodes.
Definition: field_decomposition.hpp:33
int n_planes
Number of planes belonging to this rank, including ghost planes.
Definition: field_decomposition.hpp:37
View< int *, CLayout, HostType > all_last_plane
Last plane of each rank.
Definition: field_decomposition.hpp:46
int n_ghost_vertices
Number of ghost vertices on each side of domain.
Definition: field_decomposition.hpp:25
int nplanes_owned
Number of planes belonging to this rank, NOT including ghost planes.
Definition: field_decomposition.hpp:30
int first_plane
First plane belonging to this rank, including ghost planes.
Definition: field_decomposition.hpp:35
View< int *, CLayout, HostType > all_first_node
First node of each rank.
Definition: field_decomposition.hpp:43
int n_nodes
Number of nodes belonging to this rank, including ghost nodes.
Definition: field_decomposition.hpp:34
int first_node
First mesh node belonging to this rank, including ghost nodes.
Definition: field_decomposition.hpp:32
View< int *, CLayout, HostType > all_first_plane
First plane of each rank.
Definition: field_decomposition.hpp:45
int n_phi_domains
Number of domains in the phi direction.
Definition: field_decomposition.hpp:22
int last_plane
Last plane belonging to this rank, including ghost planes.
Definition: field_decomposition.hpp:36
int first_owned_node
First mesh node belonging to this rank, NOT including ghost nodes.
Definition: field_decomposition.hpp:27
FieldDecomposition(NLReader::NamelistReader &nlr, int nplanes, int nnodes)
Definition: field_decomposition.hpp:51
View< int *, CLayout, HostType > map_from_global_intpl
Rank in this communicator for each rank global intpl.
Definition: field_decomposition.hpp:42
bool use_near_field
For better load balancing, have each rank retain the field near its global domain decomposition in ad...
Definition: field_decomposition.hpp:39
int nnodes_owned
Number of nodes belonging to this rank, NOT including ghost nodes.
Definition: field_decomposition.hpp:28
KOKKOS_INLINE_FUNCTION int find_domain_owner(int global_plane_index, int nplanes_total, int global_node_index, int nnodes_total) const
Definition: field_decomposition.hpp:147
int all_n_nodes(int local_pid) const
Definition: field_decomposition.hpp:160
int all_n_planes(int local_pid, int nplanes) const
Definition: field_decomposition.hpp:164
Definition: NamelistReader.hpp:199
T get(const string &param, const T default_val, int val_ind=0)
Definition: NamelistReader.hpp:392
void use_namelist(const string &namelist, Options required=Required)
Definition: NamelistReader.hpp:366
void exit_XGC(std::string msg)
Definition: globals.hpp:38
KOKKOS_INLINE_FUNCTION unsigned positive_modulo(int value, unsigned m)
Definition: globals.hpp:258
int SML_COMM_RANK
Definition: my_mpi.cpp:5
MPI_Comm SML_COMM_WORLD
Definition: my_mpi.cpp:4
Definition: my_mpi.hpp:19
int nranks
Definition: my_mpi.hpp:23
int my_rank
Definition: my_mpi.hpp:24