XGCa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
species.hpp
Go to the documentation of this file.
1 #ifndef SPECIES_HPP
2 #define SPECIES_HPP
3 #include <Cabana_AoSoA.hpp>
4 #include <Cabana_DeepCopy.hpp>
5 #include <Kokkos_Core.hpp>
6 #include "NamelistReader.hpp"
7 #include "timer_macro.hpp"
8 #include "magnetic_field.hpp"
9 #include "grid.hpp"
10 #include "domain_decomposition.hpp"
11 #include "particles.hpp"
12 #include "space_settings.hpp"
13 #include "distribution.hpp"
14 #include "profile.hpp"
15 #include "gyro_avg_mat.hpp"
17 #include "basic_physics.hpp"
18 #include "memory_prediction.hpp"
19 #include "xgc_io.hpp"
20 
21 extern "C" void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles* ptl);
22 extern "C" void set_min_max_num(int isp, int n_ptl);
23 extern "C" void adjust_n_ptl_for_core_ptl(int* n_ptl);
24 
29  return false;
30 }
31 
36  return false;
37 }
38 
39 // Used for Cabana slices (getting one particle property at a time)
40 namespace PtlSlice{
41 #ifdef ESC_PTL
42 enum{Ph=0,Ct,Gid,Flag};
43 #else
44 enum{Ph=0,Ct,Gid};
45 #endif
46 }
47 
48 struct PtlMvmt{
49  // Options for particle location management when looping over particles
50  enum SendOpt{
51  NoSend=0,
54  };
55 
56  enum ReturnOpt{
60  };
61 
64 
65  PtlMvmt(SendOpt send_opt, ReturnOpt return_opt) : send_opt(send_opt), return_opt(return_opt){}
66 };
67 
71 };
72 
73 // Species class
74 template<class Device>
75 class Species{
76  public:
77 
78  int idx;
79  bool is_electron;
80  bool is_adiabatic;
83  double mass;
84  double charge;
85  double charge_eu;
86  double c_m;
87  double c2_2m;
88 
92  bool dynamic_f0;
97 
98  bool is_deltaf;
99 
100  int ncycles;
102 
104  int n_ptl;
105  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> particles;
106 
107  // Device particles
108  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> particles_d;
109 
111 
114 
115  /*** Could be its own class inside species? ***/
119 
120  // phase0 (for ion restoration)
121  Cabana::AoSoA<PhaseDataTypes,HostType,VEC_LEN> phase0;
122  Cabana::AoSoA<PhaseDataTypes,Device,VEC_LEN> phase0_d;
123 
124  // For electron restoration
125  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> backup_particles;
126  int n_backup_particles; // Number of particles stored in backup_particles (can't be deduced from its size due to buffer)
127  /****/
128 
130 
132 
133  Eq::Profile<Device> eq_temp; // Equilibrium temperature
134  Eq::Profile<Device> eq_den; // Equilibrium density
135  Eq::Profile<Device> eq_flow; // Equilibrium flow
136  int eq_flow_type; // Type of Equilibirum flow
137 
138  Eq::Profile<Device> eq_fg_temp; // Equilibrium temperature
139  Eq::Profile<Device> eq_fg_den; // Equilibrium density - not used
140  Eq::Profile<Device> eq_fg_flow; // Equilibrium flow - not used for now
141  int eq_fg_flow_type; // Type of Equilibirum flow
142 
143  Eq::Profile<Device> eq_mk_temp; // Equilibrium temperature
144  Eq::Profile<Device> eq_mk_den; // Equilibrium density
145  Eq::Profile<Device> eq_mk_flow; // Equilibrium flow
146  int eq_mk_flow_type; // Type of Equilibirum flow
147 
148 
150 
151  /*** Constructors ***/
152 
153  Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, bool is_deltaf_in,
154  int ncycles_in);
155 
156  Species(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const MagneticField<DeviceType> &magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, int idx_in, int nonadiabatic_idx_in);
157 
158  // Electron or ion default constructor
159  Species(SpeciesType sp_type, int n_ptl)
160  : idx(sp_type==ELECTRON ? 0 : 1),
161  is_electron(sp_type==ELECTRON),
162  mass(is_electron ? 3.344e-30 : PROTON_MASS),
164  charge_eu(is_electron ? -1.0 : 1.0),
165  is_deltaf(true),
166  is_adiabatic(false),
167  nonadiabatic_idx(idx), // Since is_adiabatic is false above
169  ncycles(is_electron ? 70 : 1),
170  c_m(charge/mass),
171  c2_2m(0.5*charge*charge/mass),
173  n_ptl(n_ptl),
174  backup_particles("backup_particles", 0),
175  particles("particles", add_vec_buffer(n_ptl)),
178  eq_temp(1.0e3,-0.1),
179  eq_den(1.0e19,-0.1),
180  eq_flow_type(2),
181  eq_fg_temp(1.0e3,-0.1),
182  eq_fg_den(1.0e19,-0.1),
183  eq_fg_flow_type(2),
184  eq_mk_temp(1.0e3,-0.1),
185  eq_mk_den(1.0e19,-0.1),
186  eq_mk_flow_type(2),
187  owns_particles_d(false),
191 
192  // Special constructor for tests that involve tracking particles in memory
193  // The idea is to use these particles to test functions that reorder particles,
194  // e.g. sort, shift, and cleaning
195  Species(int n_ptl_in)
196  : n_ptl(n_ptl_in),
197  is_electron(true),
198  is_adiabatic(false),
199  particles("particles", add_vec_buffer(n_ptl_in)),
201  owns_particles_d(false),
206  {
207 
208  // Slice particle properties
209  auto ph = Cabana::slice<PtlSlice::Ph>(particles);
210  auto ct = Cabana::slice<PtlSlice::Ct>(particles);
211  auto gid = Cabana::slice<PtlSlice::Gid>(particles);
212 #ifdef ESC_PTL
213  auto flag = Cabana::slice<PtlSlice::Flag>(particles);
214 #endif
215 
216  // Offset gid if using MPI
217 #ifdef USE_MPI
218  long long int gid_offset = n_ptl*SML_COMM_RANK;
219 #else
220  long long int gid_offset = 0;
221 #endif
222 
223  // Assign trackable values
224  for (int i=0;i<n_ptl;i++){
225  // Set GID in order
226  gid(i) = gid_offset + i+1; // 1-indexed
227 
228  // Value of properties is gid + 0.1*(property index)
229  // First particle: (1.0, 1.1, ... 1.8)
230  // Second particle: (2.0, 2.1, ... 2.8)
231  for (int j=0;j<6;j++) ph(i, j) = gid(i) + (j)*0.1;
232  for (int j=0;j<3;j++) ct(i, j) = gid(i) + (j+6)*0.1;
233  }
234 
235  // Buffer particles: same but with gid = -1
236  if(n_ptl>0){
237  for (int i=n_ptl;i<add_vec_buffer(n_ptl);i++){
238  gid(i) = -1;
239  for (int j=0;j<6;j++) ph(i, j) = gid(i) + (j)*0.1;
240  for (int j=0;j<3;j++) ct(i, j) = gid(i) + (j+6)*0.1;
241  }
242  }
243  }
244 
245  static std::vector<MemoryPrediction> estimate_memory_usage(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx);
246 
247  static int get_initial_n_ptl(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int sml_special, int species_idx, bool verbose);
248 
249  void resize_particles(int new_n_ptl){
250  n_ptl = new_n_ptl;
251 
252 #ifndef USE_GPU
253  // If CPU-only, particles_d points to the same location as particles. If particles is resized, then Cabana will not deallocate the first allocation
254  // since it is still used by particles_d. So, reset particles_d before resize, and point it back to particles only afterwards
255  particles_d = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>();
256 #endif
257 
258  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
259  particles.resize(add_vec_buffer(n_ptl));
260 
261 #ifndef USE_GPU
262  // Point particles_d back to particles after resize
264 #endif
265 
267  }
268 
270 #ifdef USE_GPU
271  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
272  particles.resize(particles_d.size());
273 #else
274  // Point particles back to particles_d after resize
276 #endif
277 
279  }
280 
281  /* If using CPU-only, then "device" particles are a shallow copy of host particles so that
282  * there is no unnecessary duplication. When "device" particles are resized, Cabana will keep the
283  * original allocation if there is a second reference (i.e. host particles).
284  * To resolve this, we free the host particles here so that there is no second reference
285  * */
287  particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>();
288  }
289 
290  /* Resizes device particles if on GPU, or just creates a shallow copy if CPU only
291  * */
293  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
294 
295 #ifdef USE_GPU
296  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
297  // Resize device particles to match n particles
299 #else
300  // If kernels are on CPU, do shallow copy
302 #endif
303  }
304 
305  /* Resizes device particles
306  * */
307  void resize_device_particles(int new_n_ptl){
308  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
309 
310  n_ptl = new_n_ptl;
311 
312  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
313 
314  // Resize device particles to match host particles
316  }
317 
318  /* Copies particles to device - deep copy if using GPU, otherwise shallow copy
319  * Also takes the opportunity to set the buffer particles to realistic values
320  * */
322  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles to device, but doesn't own the device array.");
323 
324 #ifdef USE_GPU
325  // Copy to device
326  Cabana::deep_copy(particles_d, particles);
327 #else
328  // No operation required if CPU-only
329 #endif
330 
331  // Copy last particle to fill remainder of trailing vector in AoSoA
333  }
334 
335  /* Copies particles from device - deep copy if using GPU, otherwise no copy is necessary
336  * */
338  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles from device, but doesn't own the device array.");
339 
340 #ifdef USE_GPU
341  // Copy particles to host
342  Cabana::deep_copy(particles, particles_d);
343 #else
344  // No operation required if CPU-only
345 #endif
346  }
347 
348  /* Copies particles to device if they are resident on the device
349  * */
352  }
353 
354  /* Copies particles from device if they are resident on the device
355  * */
358  }
359 
360  /* Copies particles to device if they are NOT resident on the device
361  * */
364  }
365 
366  /* Copies particles from device if they are NOT resident on the device
367  * */
370  }
371 
380  if (n_ptl>0){
381  int last_ptl_index = n_ptl - 1;
382  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
383  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
384  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
385 #ifdef ESC_PTL
386  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
387 #endif
388 
389  Kokkos::parallel_for("set_buffer_particles_d", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
390  // Buffer particles: same as last particle, gid = -1
391  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
392  for (int j=0;j<3;j++) ct(i, j) = ct(last_ptl_index, j);
393  gid(i) = -1;
394 #ifdef ESC_PTL
395  flag(i) = flag(last_ptl_index);
396 #endif
397  });
398  }
399  }
400 
409  if (n_ptl>0){
410  int last_ptl_index = n_ptl - 1;
411  auto ph = Cabana::slice<PtlSlice::Ph>(phase0_d);
412 
413  Kokkos::parallel_for("set_buffer_phase0", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
414  // copy final real particle
415  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
416  });
417  }
418  }
419 
420  // Options for custom launch bounds since kokkos defaults are suboptimal for electron push kernel
421  enum class LaunchBounds{
422  Default,
423  Custom
424  };
425 
431  template<typename F>
432  inline void for_all_particles(const std::string label, F lambda_func) const {
433  Kokkos::RangePolicy<ExSpace> particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
434  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
435  }
436 
437  inline void back_up_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
438  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
439  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
440  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
441 #ifdef ESC_PTL
442  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
443 #endif
444 
445  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
446  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
447  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
448 #ifdef ESC_PTL
449  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
450 #endif
451 
452  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
453  int i_offset = i + offset;
454  // Make backup copy
455  for (int j=0;j<6;j++) ph_b(i, j) = ph(i_offset, j);
456  for (int j=0;j<3;j++) ct_b(i, j) = ct(i_offset, j);
457  gid_b(i) = gid(i_offset);
458 #ifdef ESC_PTL
459  flag_b(i) = flag(i_offset);
460 #endif
461  // Deactivate
462  gid(i_offset) = -1;
463  });
464  }
465 
466  inline void restore_backup_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
467  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
468  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
469  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
470 #ifdef ESC_PTL
471  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
472 #endif
473 
474  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
475  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
476  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
477 #ifdef ESC_PTL
478  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
479 #endif
480 
481  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
482  int i_offset = i + offset;
483  // Restore from backup copy
484  for (int j=0;j<6;j++) ph(i_offset, j) = ph_b(i, j);
485  for (int j=0;j<3;j++) ct(i_offset, j) = ct_b(i, j);
486  gid(i_offset) = gid_b(i);
487 #ifdef ESC_PTL
488  flag(i_offset) = flag_b(i);
489 #endif
490  });
491  }
492 
498  template<typename F>
499  inline void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const {
500  if(end_idx <= begin_idx) return; // Return if range is 0 or less
501 
502  // Still need the subset to line up with the AoSoA vector length
503  int first_soa = begin_idx/VEC_LEN;
504  int n_other_ptl_in_first_soa = begin_idx - first_soa*VEC_LEN;
505  bool first_soa_is_partial = (n_other_ptl_in_first_soa>0);
506  int last_soa = (end_idx-1)/VEC_LEN;
507  int n_other_ptl_in_last_soa = (last_soa+1)*VEC_LEN - end_idx;
508  bool last_soa_is_partial = (n_other_ptl_in_last_soa>0);
509 
510 #ifdef USE_GPU
511  int first_item_in_shifted_range = first_soa*VEC_LEN;
512 #else
513  int first_item_in_shifted_range = first_soa;
514 #endif
515 
516  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_first_soa;
517  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_last_soa;
518  if(first_soa_is_partial){
519  // Make a backup of the first SoA and set the particles_d GIDs to -1
520  ptl_first_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_first_soa", n_other_ptl_in_first_soa);
521  back_up_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
522  }
523  if(last_soa_is_partial){
524  // Make a backup of the last SoA and set the particles_d GIDs to -1
525  ptl_last_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_last_soa", n_other_ptl_in_last_soa);
526  back_up_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
527  }
528 
529  // Finally, do the parallel_for
530  Kokkos::RangePolicy<ExSpace> particle_range_policy( first_item_in_shifted_range, p_range<DeviceType>(end_idx) );
531  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
532 
533  // Restore from backup
534  if(first_soa_is_partial){
535  restore_backup_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
536  }
537  if(last_soa_is_partial){
538  restore_backup_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
539  }
540  }
541 
549  template<typename F>
550  inline void for_all_particles(const std::string label, F lambda_func,
551  const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default) {
552  if(!owns_particles_d) exit_XGC("\nSpecies tried to loop over particles on device, but doesn't own the device array.");
553 
554  bool use_streaming = stream_particles;
555 #ifndef USE_STREAMS
556  use_streaming = false; // Just to be safe, turn streams off here
557 #endif
558 
559  bool send_ptl = ( mvmt.send_opt==PtlMvmt::Send ||
561  bool return_ptl = ( mvmt.return_opt==PtlMvmt::Return ||
563 
564  // Don't need to stream if particles are already present and don't need to be returned
565  if((!send_ptl) && (!return_ptl)) use_streaming = false;
566 
567  if(use_streaming){
568 #ifdef USE_STREAMS
569  Streamed::Option stream_option = Streamed::Normal; // Send to device and back
570  if(!send_ptl) stream_option = Streamed::NoSend;
571  if(!return_ptl) stream_option = Streamed::NoReturn;
572 
573  // Execute streaming parallel_for
574  Streamed::parallel_for(label, n_ptl, lambda_func, stream_option, particles, particles_d);
575 #endif
576  }else{
577  if(send_ptl) TIMER("copy_ptl_to_device",copy_particles_to_device() );
578 
579  if (launch_bounds==LaunchBounds::Custom) {
580 #ifdef USE_EPUSH_LAUNCH_BOUNDS
581 # if !defined(PUSH_MAX_THREADS_PER_BLOCK) || !defined(PUSH_MIN_WARPS_PER_EU)
582 # error "USE_EPUSH_LAUNCH_BOUNDS requires PUSH_MAX_THREADS_PER_BLOCK and PUSH_MIN_WARPS_PER_EU to be defined"
583 # endif
584  Kokkos::RangePolicy<ExSpace, Kokkos::LaunchBounds<PUSH_MAX_THREADS_PER_BLOCK, PUSH_MIN_WARPS_PER_EU>>
585  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
586  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
587 #else
588  exit_XGC("\nERROR: LaunchBounds::Custom specified, but USE_EPUSH_LAUNCH_BOUNDS is not defined\n");
589 #endif
590  } else {
591  Kokkos::RangePolicy<ExSpace>
592  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
593  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
594  }
595 
596  if(return_ptl) TIMER("copy_ptl_from_device", copy_particles_from_device() );
597  }
598  }
599 
600  KOKKOS_INLINE_FUNCTION VecParticles* ptl() const{
601  return (VecParticles*)(&particles_d.access(0));
602  }
603 
604  KOKKOS_INLINE_FUNCTION VecPhase* ph0() const{
605  return (VecPhase*)(&phase0_d.access(0));
606  }
607 
609  for_all_particles("copy_to_phase0", KOKKOS_LAMBDA( const int idx ){
610  AoSoAIndices<DeviceType> inds(idx);
611  VecParticles* ptl_loc = species.ptl();
612  VecPhase* ph0_loc = species.ph0();
613  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
614  int p_vec = inds.a + i_simd;
615  ph0_loc[inds.s].r[p_vec] = ptl_loc[inds.s].ph.r[p_vec];
616  ph0_loc[inds.s].z[p_vec] = ptl_loc[inds.s].ph.z[p_vec];
617  ph0_loc[inds.s].phi[p_vec] = ptl_loc[inds.s].ph.phi[p_vec];
618  ph0_loc[inds.s].rho[p_vec] = ptl_loc[inds.s].ph.rho[p_vec];
619  ph0_loc[inds.s].w1[p_vec] = ptl_loc[inds.s].ph.w1[p_vec];
620  ph0_loc[inds.s].w2[p_vec] = ptl_loc[inds.s].ph.w2[p_vec];
621  }
622  });
623  }
624 
627  // If particles are resident on the device, then use the host particle allocation as the backup
628  // Otherwise, resize the backup_particle array
631  }else{
632  backup_particles.resize(particles_d.size());
633  }
634 
635  // Copy particle data to backup
636  Cabana::deep_copy(backup_particles, particles_d);
638  } else {
639  // For ions, save phase to phase0
640  phase0_d.resize(particles_d.size());
641  copy_to_phase0(*this); // Separate function to avoid implicit copy into lambda
642  }
644  }
645 
648  // If particles are resident on device, don't need to resize host particles since they are already used as the backup.
649  // If not, resize host particle array to fit backup particles
653  }else{
654  // If particles are not resident on device, resize host particle array to fit backup particles
656  }
657 
658  // Resize device particle array to fit backed up particles
660 
661  // Restore particle data from backup
662  Cabana::deep_copy(particles_d, backup_particles);
663 
665  // If the backup particles are simply pointing to the host particles, then
666  // point them to their own 0-sized allocation when finished using them
667  // so that they don't make a copy when host particles get resized
668  backup_particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>("backup_particles", 0);
669  }
670  } else {
671  // When ipc==2, copy phase0 to device
672  // First, resize device phase0 and reset pointer
673  phase0_d.resize(phase0.size());
674 
675  // Next copy data to phase0 on device
676  Cabana::deep_copy(phase0_d, phase0);
677 
678  // Fill buffer with realistic ptl data
680  }
681  particles_are_backed_up = false;
682  }
683 
684  KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices<Device>& inds, SimdParticles& part_one) const {
685  VecPhase* ph0_loc = ph0();
686  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
687  int p_vec = inds.a + i_simd;
688  part_one.ph.r[i_simd] = ph0_loc[inds.s].r[p_vec];
689  part_one.ph.z[i_simd] = ph0_loc[inds.s].z[p_vec];
690  part_one.ph.phi[i_simd] = ph0_loc[inds.s].phi[p_vec];
691  part_one.ph.rho[i_simd] = ph0_loc[inds.s].rho[p_vec];
692  part_one.ph.w1[i_simd] = ph0_loc[inds.s].w1[p_vec];
693  part_one.ph.w2[i_simd] = ph0_loc[inds.s].w2[p_vec];
694  }
695  }
696 
697  long long int get_total_n_ptl(){
698 #ifdef USE_MPI
699  long long int tmp_n_ptl = n_ptl;
700  long long int out_n_ptl = 0;
701  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_LONG_LONG_INT, MPI_SUM, SML_COMM_WORLD);
702  return out_n_ptl;
703 #else
704  return (long long int)(n_ptl);
705 #endif
706  }
707 
709 #ifdef USE_MPI
710  int tmp_n_ptl = n_ptl;
711  int out_n_ptl = 0;
712  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_INT, MPI_MAX, SML_COMM_WORLD);
713  return out_n_ptl;
714 #else
715  return n_ptl;
716 #endif
717  }
718 
719  // Gets the gyro_radius of a species based on equilibrium temperature
720  // inode is the LOCAL (poloidally decomposed) grid node index to get temperature
721  // smu_n is the normalized sqrt(mu)
722  // bfield is the magnetic field at inode
723  KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const{
724  // Should replace UNIT_CHARGE*charge_eu with charge(?)
725  return smu_n*sqrt(mass*f0.fg_temp_ev(inode)*EV_2_J) / (UNIT_CHARGE*charge_eu*bfield);
726  }
727 
728  // Gets the equilibrium thermal velocity of a species based on f0 temperature
729  // inode is the GLOBAL node index to get temperature
730  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const{
731  return thermal_velocity(mass, f0.temp_global(inode));
732  }
733 
734  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on device
735  // inode is the local node index to get temperature
736  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const{
737  return thermal_velocity(mass, f0.temp_ev(inode));
738  }
739 
740  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on host
741  // inode is the local node index to get temperature
742  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const{
743  return thermal_velocity(mass, f0.temp_ev_h(inode));
744  }
745 
746  KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const{
747  return thermal_velocity(mass, f0.fg_temp_ev_h(inode));
748  }
749 
750  // Get species velocity
751  KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid<DeviceType>& grid, const MagneticField<DeviceType>& magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, SimdParticles& part, Simd<double>& smu, Simd<double>& vp, Simd<int>& nearest_node, Simd<bool>& not_in_triangle, Simd<bool>& not_in_poloidal_domain) const{
752 
753  // This modulo surely doesnt need to be here (at least, should be elsewhere).
754  // Modulo phi coordinate
755  grid.wedge_modulo_phi(part.ph.phi);
756 
758  grid.get_grid_weights(magnetic_field, part.ph.v(), grid_wts0);
759 
760  // Output argument
761  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
762  not_in_triangle[i_simd] = !grid_wts0.is_valid(i_simd);
763  }
764 
765  Simd<double> bmag;
766  magnetic_field.bmag_interpol(part.ph.v(), bmag);
767 
768  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
769  if(!grid_wts0.is_valid(i_simd)) continue;
770 
771  nearest_node[i_simd]=grid_wts0.node[i_simd] - pol_decomp.node_offset;
772  not_in_poloidal_domain[i_simd] = (nearest_node[i_simd]<0 || nearest_node[i_simd]>=pol_decomp.nnodes);
773 
774  double temp_ev_norm = not_in_poloidal_domain[i_simd] ? f0.fg_temp_ev(0) : f0.fg_temp_ev(nearest_node[i_simd]);
775 
776  // get vp and smu
777  const double& B = bmag[i_simd];
778  vp[i_simd] = normalized_v_para(c_m, mass, B, temp_ev_norm, part.ph.rho[i_simd]);
779  smu[i_simd] = normalized_sqrt_mu(B, temp_ev_norm, part.ct.mu[i_simd]);
780  }
781  }
782 
784  const Grid<DeviceType>& grid,
786  const VelocityGrid& vgrid);
787 
790 
791  void write_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
792  void write_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
793  void read_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
794  void read_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name, bool n_ranks_is_same, int version);
795 
796  long long int get_max_gid() const;
797  void get_ptl_write_total_and_offsets(const DomainDecomposition<DeviceType>& pol_decomp, long long int& inum_total, long long int& ioff) const;
798 };
799 
800 #endif
void calculate_global_f0_arrays(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field)
Definition: species.cpp:493
Cabana::AoSoA< PhaseDataTypes, HostType, VEC_LEN > phase0
Definition: species.hpp:121
WeightEvoEq weight_evo_eq
Definition: species.hpp:91
bool stream_particles
Whether to stream particles between host and device if possible.
Definition: species.hpp:113
Definition: globals.hpp:84
KOKKOS_INLINE_FUNCTION VecPhase * ph0() const
Definition: species.hpp:604
KOKKOS_INLINE_FUNCTION int divide_and_round_up(int a, int b)
Definition: globals.hpp:203
bool owns_particles_d
Whether the species owns the device particle allocation right now.
Definition: species.hpp:110
void back_up_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:437
KOKKOS_INLINE_FUNCTION double normalized_v_para(double c_m, double mass, double B, double temp_ev, double rho)
Definition: basic_physics.hpp:80
subroutine adjust_n_ptl_for_core_ptl(n_ptl)
Definition: load.F90:54
void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles *ptl)
void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const
Definition: species.hpp:499
constexpr double PROTON_MASS
Definition: constants.hpp:7
MarkerType
Definition: globals.hpp:110
Distribution< Device > f0
Species distribution in velocity space on local mesh nodes.
Definition: species.hpp:129
MPI_Comm SML_COMM_WORLD
Definition: my_mpi.cpp:4
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > backup_particles
Copy of particles to be restored for RK2.
Definition: species.hpp:125
bool is_electron
Whether this species is the electrons.
Definition: species.hpp:79
Eq::Profile< Device > eq_mk_temp
Definition: species.hpp:143
void for_all_particles(const std::string label, F lambda_func, const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default)
Definition: species.hpp:550
bool dynamic_f0
Whether f0 can evolve in time.
Definition: species.hpp:96
void save_backup_particles()
Definition: species.hpp:625
void update_decomposed_f0_calculations(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const VelocityGrid &vgrid)
Definition: species.cpp:438
double c2_2m
c2/2m
Definition: species.hpp:87
double rho[VEC_LEN]
Definition: particles.hpp:96
void copy_to_phase0(Species< Device > &species)
Definition: species.hpp:608
Definition: species.hpp:58
Simd< double > w1
Definition: particles.hpp:22
double c_m
c/m
Definition: species.hpp:86
Definition: species.hpp:57
constexpr double EV_2_J
Conversion rate ev to J.
Definition: constants.hpp:5
Definition: velocity_grid.hpp:8
bool default_streaming_option()
Definition: species.hpp:28
Eq::Profile< Device > eq_den
Definition: species.hpp:134
Definition: globals.hpp:89
KOKKOS_INLINE_FUNCTION VecParticles * ptl() const
Definition: species.hpp:600
Definition: grid_weights.hpp:47
KOKKOS_INLINE_FUNCTION double thermal_velocity(double mass, double temp_ev)
Definition: basic_physics.hpp:58
Definition: NamelistReader.hpp:193
KinType kintype
Whether the species is gyrokinetic or drift kinetic.
Definition: species.hpp:82
Eq::Profile< Device > eq_fg_temp
Definition: species.hpp:138
Definition: magnetic_field.hpp:12
void get_ptl_write_total_and_offsets(const DomainDecomposition< DeviceType > &pol_decomp, long long int &inum_total, long long int &ioff) const
Definition: species.cpp:578
int add_vec_buffer(int n_ptl)
Definition: particles.hpp:194
int idx
Index in all_species.
Definition: species.hpp:78
Definition: particles.hpp:92
int a
The index in the inner array of the AoSoA.
Definition: particles.hpp:150
KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const
Definition: species.hpp:746
Definition: particles.hpp:109
KOKKOS_INLINE_FUNCTION void bmag_interpol(const SimdVector &v, Simd< double > &bmag) const
Definition: magnetic_field.tpp:239
bool particles_are_backed_up
Whether particles are currently backed up.
Definition: species.hpp:118
int nonadiabatic_idx
Index of species skipping adiabatic species (for compatibility with fortran arrays) ...
Definition: species.hpp:81
bool default_residence_option()
Definition: species.hpp:35
int n_ptl
Number of particles.
Definition: species.hpp:104
Eq::Profile< Device > eq_mk_flow
Definition: species.hpp:145
KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const DomainDecomposition< DeviceType > &pol_decomp, SimdParticles &part, Simd< double > &smu, Simd< double > &vp, Simd< int > &nearest_node, Simd< bool > &not_in_triangle, Simd< bool > &not_in_poloidal_domain) const
Definition: species.hpp:751
Definition: streamed_parallel_for.hpp:16
KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const
Definition: species.hpp:723
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:55
Definition: streamed_parallel_for.hpp:14
Eq::Profile< Device > eq_fg_flow
Definition: species.hpp:140
void set_buffer_phase0_d()
Definition: species.hpp:408
long long int get_total_n_ptl()
Definition: species.hpp:697
void set_buffer_particles_d()
Definition: species.hpp:379
Simd< double > rho
Definition: particles.hpp:21
Definition: species.hpp:59
int p_range< DeviceType >(int num_particle)
Definition: particles.hpp:187
int eq_flow_type
Definition: species.hpp:136
double charge_eu
Particle charge in eu.
Definition: species.hpp:85
Definition: species.hpp:51
int nnodes
Number of nodes belonging to this MPI rank.
Definition: domain_decomposition.hpp:56
void resize_particles(int new_n_ptl)
Definition: species.hpp:249
double mass
Particle mass.
Definition: species.hpp:83
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const
Definition: species.hpp:730
Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, bool is_deltaf_in, int ncycles_in)
Definition: species.cpp:25
void write_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:603
void for_all_particles(const std::string label, F lambda_func) const
Definition: species.hpp:432
Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > particles_d
Particles on device.
Definition: species.hpp:108
Definition: species.hpp:48
double w2[VEC_LEN]
Definition: particles.hpp:98
#define TIMER(N, F)
Definition: timer_macro.hpp:24
FAnalyticShape
Definition: globals.hpp:116
Eq::Profile< Device > eq_mk_den
Definition: species.hpp:144
RKRestorationMethod
Definition: species.hpp:68
void copy_particles_to_device_if_not_resident()
Definition: species.hpp:362
RKRestorationMethod RK_restoration_method
Currently, electrons must use first method and ions must use second.
Definition: species.hpp:116
Simd< double > r
Definition: particles.hpp:18
void resize_host_particles_to_match_device()
Definition: species.hpp:269
Definition: species.hpp:69
KOKKOS_INLINE_FUNCTION SimdVector & v()
Definition: particles.hpp:39
ReturnOpt return_opt
Definition: species.hpp:63
ReturnOpt
Definition: species.hpp:56
Option
Definition: streamed_parallel_for.hpp:13
void restore_particles_from_backup()
Definition: species.hpp:646
MarkerType marker_type
Marker type: reduced delta-f, total-f, full-f, or none (placeholder for adiabatic species) ...
Definition: species.hpp:89
Definition: globals.hpp:90
static std::vector< MemoryPrediction > estimate_memory_usage(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx)
Definition: species.cpp:56
SendOpt send_opt
Definition: species.hpp:62
double charge
Particle charge.
Definition: species.hpp:84
SimdPhase ph
Definition: particles.hpp:59
void copy_particles_from_device()
Definition: species.hpp:337
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const
Definition: species.hpp:736
void copy_particles_from_device_if_not_resident()
Definition: species.hpp:368
KOKKOS_INLINE_FUNCTION void wedge_modulo_phi(Simd< double > &phi_mod) const
Definition: grid.tpp:86
void unassign_host_particles()
Definition: species.hpp:286
void read_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name, bool n_ranks_is_same, int version)
Definition: species.cpp:653
int ncycles_between_sorts
Number of subcycles between sorts.
Definition: species.hpp:101
Definition: particles.hpp:58
Cabana::AoSoA< PhaseDataTypes, Device, VEC_LEN > phase0_d
Definition: species.hpp:122
int SML_COMM_RANK
Definition: my_mpi.cpp:5
KinType
Definition: globals.hpp:88
Species(SpeciesType sp_type, int n_ptl)
Definition: species.hpp:159
bool is_deltaf
Whether this species is deltaf.
Definition: species.hpp:98
Definition: xgc_io.hpp:24
VecPhase ph
Definition: particles.hpp:110
Definition: species.hpp:44
Definition: species.hpp:70
void set_min_max_num(int isp, int n_ptl)
int minimum_ptl_reservation
The minimum reservation size for particles.
Definition: species.hpp:103
int s
The index in the outer array of the AoSoA.
Definition: particles.hpp:149
KOKKOS_INLINE_FUNCTION double normalized_sqrt_mu(double B, double temp_ev, double mu)
Definition: basic_physics.hpp:90
Simd< double > z
Definition: particles.hpp:19
void copy_particles_to_device_if_resident()
Definition: species.hpp:350
Definition: species.hpp:52
void resize_device_particles(int new_n_ptl)
Definition: species.hpp:307
Definition: species.hpp:53
constexpr double UNIT_CHARGE
Charge of an electron (C)
Definition: constants.hpp:4
long long int get_max_gid() const
Definition: species.cpp:555
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const
Definition: species.hpp:742
void exit_XGC(std::string msg)
Definition: globals.hpp:37
void copy_particles_from_device_if_resident()
Definition: species.hpp:356
bool is_adiabatic
Whether this species is adiabatic.
Definition: species.hpp:80
Simd< double > phi
Definition: particles.hpp:20
FAnalyticShape f_analytic_shape
f_analytic_shape shape: Maxwellian, SlowingDown or None
Definition: species.hpp:90
Definition: magnetic_field.F90:1
static constexpr const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t Async
Definition: space_settings.hpp:82
int n_backup_particles
Definition: species.hpp:126
Eq::Profile< Device > eq_flow
Definition: species.hpp:135
Definition: streamed_parallel_for.hpp:15
SendOpt
Definition: species.hpp:50
void read_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:748
SimdConstants ct
Definition: particles.hpp:60
int eq_fg_flow_type
Definition: species.hpp:141
void write_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:712
void copy_particles_to_device()
Definition: species.hpp:321
KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices< Device > &inds, SimdParticles &part_one) const
Definition: species.hpp:684
Species(int n_ptl_in)
Definition: species.hpp:195
double phi[VEC_LEN]
Definition: particles.hpp:95
Simd< double > w2
Definition: particles.hpp:23
double r[VEC_LEN]
Definition: particles.hpp:93
GyroAverageMatrices< HostType > gyro_avg_matrices
Definition: species.hpp:149
Definition: species.hpp:75
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
KOKKOS_INLINE_FUNCTION void get_grid_weights(const MagneticField< Device > &magnetic_field, const SimdVector &v, const Simd< double > &psi, SimdVector2D &xff, SimdGridWeights< Order::One, PIT > &grid_wts) const
Definition: grid.tpp:32
Definition: species.hpp:44
Eq::Profile< Device > eq_temp
Definition: species.hpp:133
bool particles_resident_on_device
Whether the particles can reside on device.
Definition: species.hpp:112
Simd< double > mu
Definition: particles.hpp:52
Eq::Profile< Device > eq_fg_den
Definition: species.hpp:139
PtlMvmt(SendOpt send_opt, ReturnOpt return_opt)
Definition: species.hpp:65
static int get_initial_n_ptl(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int sml_special, int species_idx, bool verbose)
Definition: species.cpp:113
int ncycles
Number of subcycles.
Definition: species.hpp:100
int eq_mk_flow_type
Definition: species.hpp:146
WeightEvoEq
Definition: globals.hpp:121
Definition: profile.hpp:171
int collision_grid_index
Which collision grid to use.
Definition: species.hpp:131
Definition: particles.hpp:148
SpeciesType
Definition: globals.hpp:83
double z[VEC_LEN]
Definition: particles.hpp:94
void restore_backup_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:466
Definition: distribution.hpp:10
void resize_device_particles()
Definition: species.hpp:292
int get_max_n_ptl()
Definition: species.hpp:708
LaunchBounds
Definition: species.hpp:421
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > particles
Particles.
Definition: species.hpp:105
double w1[VEC_LEN]
Definition: particles.hpp:97
Definition: species.hpp:44