XGC1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
species.hpp
Go to the documentation of this file.
1 #ifndef SPECIES_HPP
2 #define SPECIES_HPP
3 #include <Cabana_AoSoA.hpp>
4 #include <Cabana_DeepCopy.hpp>
5 #include <Kokkos_Core.hpp>
6 #include "NamelistReader.hpp"
7 #include "timer_macro.hpp"
8 #include "magnetic_field.hpp"
9 #include "grid.hpp"
10 #include "domain_decomposition.hpp"
11 #include "particles.hpp"
12 #include "space_settings.hpp"
13 #include "distribution.hpp"
14 #include "profile.hpp"
15 #include "gyro_avg_mat.hpp"
17 #include "basic_physics.hpp"
18 #include "memory_prediction.hpp"
19 #include "xgc_io.hpp"
20 
21 extern "C" void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles* ptl);
22 extern "C" void set_min_max_num(int isp, int n_ptl);
23 extern "C" void adjust_n_ptl_for_core_ptl(int* n_ptl);
24 
29  return false;
30 }
31 
36  return false;
37 }
38 
39 // Used for Cabana slices (getting one particle property at a time)
40 namespace PtlSlice{
41 #ifdef ESC_PTL
42 enum{Ph=0,Ct,Gid,Flag};
43 #else
44 enum{Ph=0,Ct,Gid};
45 #endif
46 }
47 
48 struct PtlMvmt{
49  // Options for particle location management when looping over particles
50  enum SendOpt{
51  NoSend=0,
54  };
55 
56  enum ReturnOpt{
60  };
61 
64 
65  PtlMvmt(SendOpt send_opt, ReturnOpt return_opt) : send_opt(send_opt), return_opt(return_opt){}
66 };
67 
71 };
72 
73 // Species class
74 template<class Device>
75 class Species{
76  public:
77 
78  int idx;
79  bool is_electron;
80  bool is_adiabatic;
83  double mass;
84  double charge;
85  double charge_eu;
86  double c_m;
87  double c2_2m;
88 
92  bool dynamic_f0;
98 
99  int ncycles;
101 
103  int n_ptl;
104  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> particles;
105 
106  // Device particles
107  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> particles_d;
108 
110 
113 
114  /*** Could be its own class inside species? ***/
118 
119  // phase0 (for ion restoration)
120  Cabana::AoSoA<PhaseDataTypes,HostType,VEC_LEN> phase0;
121  Cabana::AoSoA<PhaseDataTypes,Device,VEC_LEN> phase0_d;
122 
123  // For electron restoration
124  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> backup_particles;
125  int n_backup_particles; // Number of particles stored in backup_particles (can't be deduced from its size due to buffer)
126  /****/
127 
129 
131 
132  Eq::Profile<Device> eq_temp; // Equilibrium temperature
133  Eq::Profile<Device> eq_den; // Equilibrium density
134  Eq::Profile<Device> eq_flow; // Equilibrium flow
135  int eq_flow_type; // Type of Equilibirum flow
136 
137  Eq::Profile<Device> eq_fg_temp; // Equilibrium temperature
138  Eq::Profile<Device> eq_fg_flow; // Equilibrium flow - not used for now
139  int eq_fg_flow_type; // Type of Equilibirum flow
140 
141  Eq::Profile<Device> eq_mk_temp; // Equilibrium temperature
142  Eq::Profile<Device> eq_mk_den; // Equilibrium density
143  Eq::Profile<Device> eq_mk_flow; // Equilibrium flow
144  int eq_mk_flow_type; // Type of Equilibirum flow
145 
146 
148 
149  /*** Constructors ***/
150 
151  Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in,
152  int ncycles_in);
153 
154  Species(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const MagneticField<DeviceType> &magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, int idx_in, int nonadiabatic_idx_in);
155 
156  // Electron or ion default constructor
157  Species(SpeciesType sp_type, int n_ptl)
158  : idx(sp_type==ELECTRON ? 0 : 1),
159  is_electron(sp_type==ELECTRON),
160  mass(is_electron ? 3.344e-30 : PROTON_MASS),
162  charge_eu(is_electron ? -1.0 : 1.0),
163  is_adiabatic(false),
164  nonadiabatic_idx(idx), // Since is_adiabatic is false above
166  ncycles(is_electron ? 70 : 1),
167  c_m(charge/mass),
168  c2_2m(0.5*charge*charge/mass),
170  n_ptl(n_ptl),
171  backup_particles("backup_particles", 0),
172  particles("particles", add_vec_buffer(n_ptl)),
175  eq_temp(1.0e3,-0.1),
176  eq_den(1.0e19,-0.1),
177  eq_flow_type(2),
178  eq_fg_temp(1.0e3,-0.1),
179  eq_fg_flow_type(2),
180  eq_mk_temp(1.0e3,-0.1),
181  eq_mk_den(1.0e19,-0.1),
182  eq_mk_flow_type(2),
183  owns_particles_d(false),
187 
188  // Special constructor for tests that involve tracking particles in memory
189  // The idea is to use these particles to test functions that reorder particles,
190  // e.g. sort, shift, and cleaning
191  Species(int n_ptl_in)
192  : n_ptl(n_ptl_in),
193  is_electron(true),
194  is_adiabatic(false),
195  particles("particles", add_vec_buffer(n_ptl_in)),
197  owns_particles_d(false),
202  {
203 
204  // Slice particle properties
205  auto ph = Cabana::slice<PtlSlice::Ph>(particles);
206  auto ct = Cabana::slice<PtlSlice::Ct>(particles);
207  auto gid = Cabana::slice<PtlSlice::Gid>(particles);
208 #ifdef ESC_PTL
209  auto flag = Cabana::slice<PtlSlice::Flag>(particles);
210 #endif
211 
212  // Offset gid if using MPI
213 #ifdef USE_MPI
214  long long int gid_offset = n_ptl*SML_COMM_RANK;
215 #else
216  long long int gid_offset = 0;
217 #endif
218 
219  // Assign trackable values
220  for (int i=0;i<n_ptl;i++){
221  // Set GID in order
222  gid(i) = gid_offset + i+1; // 1-indexed
223 
224  // Value of properties is gid + 0.1*(property index)
225  // First particle: (1.0, 1.1, ... 1.8)
226  // Second particle: (2.0, 2.1, ... 2.8)
227  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = gid(i) + (j)*0.1;
228  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = gid(i) + (j+PTL_NPHASE)*0.1;
229  }
230 
231  // Buffer particles: same but with gid = -1
232  if(n_ptl>0){
233  for (int i=n_ptl;i<add_vec_buffer(n_ptl);i++){
234  gid(i) = -1;
235  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = gid(i) + (j)*0.1;
236  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = gid(i) + (j+PTL_NPHASE)*0.1;
237  }
238  }
239  }
240 
241  static std::vector<MemoryPrediction> estimate_memory_usage(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx);
242 
243  static int get_initial_n_ptl(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx, bool verbose);
244 
245  void resize_particles(int new_n_ptl){
246  n_ptl = new_n_ptl;
247 
248 #ifndef USE_GPU
249  // If CPU-only, particles_d points to the same location as particles. If particles is resized, then Cabana will not deallocate the first allocation
250  // since it is still used by particles_d. So, reset particles_d before resize, and point it back to particles only afterwards
251  particles_d = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>();
252 #endif
253 
254  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
255  particles.resize(add_vec_buffer(n_ptl));
256 
257 #ifndef USE_GPU
258  // Point particles_d back to particles after resize
260 #endif
261 
263  }
264 
266 #ifdef USE_GPU
267  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
268  particles.resize(particles_d.size());
269 #else
270  // Point particles back to particles_d after resize
272 #endif
273 
275  }
276 
277  /* If using CPU-only, then "device" particles are a shallow copy of host particles so that
278  * there is no unnecessary duplication. When "device" particles are resized, Cabana will keep the
279  * original allocation if there is a second reference (i.e. host particles).
280  * To resolve this, we free the host particles here so that there is no second reference
281  * */
283  particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>();
284  }
285 
286  /* Resizes device particles if on GPU, or just creates a shallow copy if CPU only
287  * */
289  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
290 
291 #ifdef USE_GPU
292  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
293  // Resize device particles to match n particles
295 #else
296  // If kernels are on CPU, do shallow copy
298 #endif
299  }
300 
301  /* Resizes device particles
302  * */
303  void resize_device_particles(int new_n_ptl){
304  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
305 
306  n_ptl = new_n_ptl;
307 
308  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
309 
310  // Resize device particles to match host particles
312  }
313 
314  /* Copies particles to device - deep copy if using GPU, otherwise shallow copy
315  * Also takes the opportunity to set the buffer particles to realistic values
316  * */
318  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles to device, but doesn't own the device array.");
319 
320 #ifdef USE_GPU
321  // Copy to device
322  Cabana::deep_copy(particles_d, particles);
323 #else
324  // No operation required if CPU-only
325 #endif
326 
327  // Copy last particle to fill remainder of trailing vector in AoSoA
329  }
330 
331  /* Copies particles from device - deep copy if using GPU, otherwise no copy is necessary
332  * */
334  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles from device, but doesn't own the device array.");
335 
336 #ifdef USE_GPU
337  // Copy particles to host
338  Cabana::deep_copy(particles, particles_d);
339 #else
340  // No operation required if CPU-only
341 #endif
342  }
343 
344  /* Copies particles to device if they are resident on the device
345  * */
348  }
349 
350  /* Copies particles from device if they are resident on the device
351  * */
354  }
355 
356  /* Copies particles to device if they are NOT resident on the device
357  * */
360  }
361 
362  /* Copies particles from device if they are NOT resident on the device
363  * */
366  }
367 
376  if (n_ptl>0){
377  int last_ptl_index = n_ptl - 1;
378  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
379  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
380  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
381 #ifdef ESC_PTL
382  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
383 #endif
384 
385  Kokkos::parallel_for("set_buffer_particles_d", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
386  // Buffer particles: same as last particle, gid = -1
387  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = ph(last_ptl_index, j);
388  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = ct(last_ptl_index, j);
389  gid(i) = -1;
390 #ifdef ESC_PTL
391  flag(i) = flag(last_ptl_index);
392 #endif
393  });
394  }
395  }
396 
405  if (n_ptl>0){
406  int last_ptl_index = n_ptl - 1;
407  auto ph = Cabana::slice<PtlSlice::Ph>(phase0_d);
408 
409  Kokkos::parallel_for("set_buffer_phase0", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
410  // copy final real particle
411  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
412  });
413  }
414  }
415 
416  // Options for custom launch bounds since kokkos defaults are suboptimal for electron push kernel
417  enum class LaunchBounds{
418  Default,
419  Custom
420  };
421 
427  template<typename F>
428  inline void for_all_particles(const std::string label, F lambda_func) const {
429  Kokkos::RangePolicy<ExSpace> particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
430  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
431  }
432 
433  inline void back_up_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
434  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
435  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
436  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
437 #ifdef ESC_PTL
438  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
439 #endif
440 
441  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
442  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
443  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
444 #ifdef ESC_PTL
445  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
446 #endif
447 
448  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
449  int i_offset = i + offset;
450  // Make backup copy
451  for (int j=0;j<PTL_NPHASE;j++) ph_b(i, j) = ph(i_offset, j);
452  for (int j=0;j<PTL_NCONST;j++) ct_b(i, j) = ct(i_offset, j);
453  gid_b(i) = gid(i_offset);
454 #ifdef ESC_PTL
455  flag_b(i) = flag(i_offset);
456 #endif
457  // Deactivate
458  gid(i_offset) = -1;
459  });
460  }
461 
462  inline void restore_backup_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
463  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
464  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
465  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
466 #ifdef ESC_PTL
467  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
468 #endif
469 
470  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
471  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
472  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
473 #ifdef ESC_PTL
474  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
475 #endif
476 
477  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
478  int i_offset = i + offset;
479  // Restore from backup copy
480  for (int j=0;j<PTL_NPHASE;j++) ph(i_offset, j) = ph_b(i, j);
481  for (int j=0;j<PTL_NCONST;j++) ct(i_offset, j) = ct_b(i, j);
482  gid(i_offset) = gid_b(i);
483 #ifdef ESC_PTL
484  flag(i_offset) = flag_b(i);
485 #endif
486  });
487  }
488 
494  template<typename F>
495  inline void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const {
496  if(end_idx <= begin_idx) return; // Return if range is 0 or less
497 
498  // Still need the subset to line up with the AoSoA vector length
499  int first_soa = begin_idx/VEC_LEN;
500  int n_other_ptl_in_first_soa = begin_idx - first_soa*VEC_LEN;
501  bool first_soa_is_partial = (n_other_ptl_in_first_soa>0);
502  int last_soa = (end_idx-1)/VEC_LEN;
503  int n_other_ptl_in_last_soa = (last_soa+1)*VEC_LEN - end_idx;
504  bool last_soa_is_partial = (n_other_ptl_in_last_soa>0);
505 
506 #ifdef USE_GPU
507  int first_item_in_shifted_range = first_soa*VEC_LEN;
508 #else
509  int first_item_in_shifted_range = first_soa;
510 #endif
511 
512  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_first_soa;
513  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_last_soa;
514  if(first_soa_is_partial){
515  // Make a backup of the first SoA and set the particles_d GIDs to -1
516  ptl_first_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_first_soa", n_other_ptl_in_first_soa);
517  back_up_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
518  }
519  if(last_soa_is_partial){
520  // Make a backup of the last SoA and set the particles_d GIDs to -1
521  ptl_last_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_last_soa", n_other_ptl_in_last_soa);
522  back_up_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
523  }
524 
525  // Finally, do the parallel_for
526  Kokkos::RangePolicy<ExSpace> particle_range_policy( first_item_in_shifted_range, p_range<DeviceType>(end_idx) );
527  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
528 
529  // Restore from backup
530  if(first_soa_is_partial){
531  restore_backup_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
532  }
533  if(last_soa_is_partial){
534  restore_backup_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
535  }
536  }
537 
545  template<typename F>
546  inline void for_all_particles(const std::string label, F lambda_func,
547  const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default) {
548  if(!owns_particles_d) exit_XGC("\nSpecies tried to loop over particles on device, but doesn't own the device array.");
549 
550  bool use_streaming = stream_particles;
551 #ifndef USE_STREAMS
552  use_streaming = false; // Just to be safe, turn streams off here
553 #endif
554 
555  bool send_ptl = ( mvmt.send_opt==PtlMvmt::Send ||
557  bool return_ptl = ( mvmt.return_opt==PtlMvmt::Return ||
559 
560  // Don't need to stream if particles are already present and don't need to be returned
561  if((!send_ptl) && (!return_ptl)) use_streaming = false;
562 
563  if(use_streaming){
564 #ifdef USE_STREAMS
565  Streamed::Option stream_option = Streamed::Normal; // Send to device and back
566  if(!send_ptl) stream_option = Streamed::NoSend;
567  if(!return_ptl) stream_option = Streamed::NoReturn;
568 
569  // Execute streaming parallel_for
570  Streamed::parallel_for(label, n_ptl, lambda_func, stream_option, particles, particles_d);
571 #endif
572  }else{
573  if(send_ptl) TIMER("copy_ptl_to_device",copy_particles_to_device() );
574 
575  if (launch_bounds==LaunchBounds::Custom) {
576 #ifdef USE_EPUSH_LAUNCH_BOUNDS
577 # if !defined(PUSH_MAX_THREADS_PER_BLOCK) || !defined(PUSH_MIN_WARPS_PER_EU)
578 # error "USE_EPUSH_LAUNCH_BOUNDS requires PUSH_MAX_THREADS_PER_BLOCK and PUSH_MIN_WARPS_PER_EU to be defined"
579 # endif
580  Kokkos::RangePolicy<ExSpace, Kokkos::LaunchBounds<PUSH_MAX_THREADS_PER_BLOCK, PUSH_MIN_WARPS_PER_EU>>
581  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
582  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
583 #else
584  exit_XGC("\nERROR: LaunchBounds::Custom specified, but USE_EPUSH_LAUNCH_BOUNDS is not defined\n");
585 #endif
586  } else {
587  Kokkos::RangePolicy<ExSpace>
588  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
589  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
590  }
591 
592  if(return_ptl) TIMER("copy_ptl_from_device", copy_particles_from_device() );
593  }
594  }
595 
596  KOKKOS_INLINE_FUNCTION VecParticles* ptl() const{
597  return (VecParticles*)(&particles_d.access(0));
598  }
599 
600  KOKKOS_INLINE_FUNCTION VecPhase* ph0() const{
601  return (VecPhase*)(&phase0_d.access(0));
602  }
603 
605  for_all_particles("copy_to_phase0", KOKKOS_LAMBDA( const int idx ){
606  AoSoAIndices<DeviceType> inds(idx);
607  VecParticles* ptl_loc = species.ptl();
608  VecPhase* ph0_loc = species.ph0();
609  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
610  int p_vec = inds.a + i_simd;
611  ph0_loc[inds.s].r[p_vec] = ptl_loc[inds.s].ph.r[p_vec];
612  ph0_loc[inds.s].z[p_vec] = ptl_loc[inds.s].ph.z[p_vec];
613  ph0_loc[inds.s].phi[p_vec] = ptl_loc[inds.s].ph.phi[p_vec];
614  ph0_loc[inds.s].rho[p_vec] = ptl_loc[inds.s].ph.rho[p_vec];
615  ph0_loc[inds.s].w1[p_vec] = ptl_loc[inds.s].ph.w1[p_vec];
616  ph0_loc[inds.s].w2[p_vec] = ptl_loc[inds.s].ph.w2[p_vec];
617  }
618  });
619  }
620 
623  // If particles are resident on the device, then use the host particle allocation as the backup
624  // Otherwise, resize the backup_particle array
627  }else{
628  backup_particles.resize(particles_d.size());
629  }
630 
631  // Copy particle data to backup
632  Cabana::deep_copy(backup_particles, particles_d);
634  } else {
635  // For ions, save phase to phase0
636  phase0_d.resize(particles_d.size());
637  copy_to_phase0(*this); // Separate function to avoid implicit copy into lambda
638  }
640  }
641 
644  // If particles are resident on device, don't need to resize host particles since they are already used as the backup.
645  // If not, resize host particle array to fit backup particles
649  }else{
650  // If particles are not resident on device, resize host particle array to fit backup particles
652  }
653 
654  // Resize device particle array to fit backed up particles
656 
657  // Restore particle data from backup
658  Cabana::deep_copy(particles_d, backup_particles);
659 
661  // If the backup particles are simply pointing to the host particles, then
662  // point them to their own 0-sized allocation when finished using them
663  // so that they don't make a copy when host particles get resized
664  backup_particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>("backup_particles", 0);
665  }
666  } else {
667  // When ipc==2, copy phase0 to device
668  // First, resize device phase0 and reset pointer
669  phase0_d.resize(phase0.size());
670 
671  // Next copy data to phase0 on device
672  Cabana::deep_copy(phase0_d, phase0);
673 
674  // Fill buffer with realistic ptl data
676  }
677  particles_are_backed_up = false;
678  }
679 
680  KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices<Device>& inds, SimdParticles& part_one) const {
681  VecPhase* ph0_loc = ph0();
682  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
683  int p_vec = inds.a + i_simd;
684  part_one.ph.r[i_simd] = ph0_loc[inds.s].r[p_vec];
685  part_one.ph.z[i_simd] = ph0_loc[inds.s].z[p_vec];
686  part_one.ph.phi[i_simd] = ph0_loc[inds.s].phi[p_vec];
687  part_one.ph.rho[i_simd] = ph0_loc[inds.s].rho[p_vec];
688  part_one.ph.w1[i_simd] = ph0_loc[inds.s].w1[p_vec];
689  part_one.ph.w2[i_simd] = ph0_loc[inds.s].w2[p_vec];
690  }
691  }
692 
693  long long int get_total_n_ptl(){
694 #ifdef USE_MPI
695  long long int tmp_n_ptl = n_ptl;
696  long long int out_n_ptl = 0;
697  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_LONG_LONG_INT, MPI_SUM, SML_COMM_WORLD);
698  return out_n_ptl;
699 #else
700  return (long long int)(n_ptl);
701 #endif
702  }
703 
705 #ifdef USE_MPI
706  int tmp_n_ptl = n_ptl;
707  int out_n_ptl = 0;
708  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_INT, MPI_MAX, SML_COMM_WORLD);
709  return out_n_ptl;
710 #else
711  return n_ptl;
712 #endif
713  }
714 
715  // Gets the gyro_radius of a species based on equilibrium temperature
716  // inode is the LOCAL (poloidally decomposed) grid node index to get temperature
717  // smu_n is the normalized sqrt(mu)
718  // bfield is the magnetic field at inode
719  KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const{
720  // Should replace UNIT_CHARGE*charge_eu with charge(?)
721  return smu_n*sqrt(mass*f0.fg_temp_ev(inode)*EV_2_J) / (UNIT_CHARGE*charge_eu*bfield);
722  }
723 
724  // Gets the equilibrium thermal velocity of a species based on f0 temperature
725  // inode is the GLOBAL node index to get temperature
726  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const{
727  return thermal_velocity(mass, f0.temp_global(inode));
728  }
729 
730  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on device
731  // inode is the local node index to get temperature
732  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const{
733  return thermal_velocity(mass, f0.temp_ev(inode));
734  }
735 
736  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on host
737  // inode is the local node index to get temperature
738  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const{
739  return thermal_velocity(mass, f0.temp_ev_h(inode));
740  }
741 
742  KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const{
743  return thermal_velocity(mass, f0.fg_temp_ev_h(inode));
744  }
745 
746  // Get species velocity
747  KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid<DeviceType>& grid, const MagneticField<DeviceType>& magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, SimdParticles& part, Simd<double>& smu, Simd<double>& vp, Simd<int>& nearest_node, Simd<bool>& not_in_triangle, Simd<bool>& not_in_poloidal_domain) const{
748 
749  // This modulo surely doesnt need to be here (at least, should be elsewhere).
750  // Modulo phi coordinate
751  grid.wedge_modulo_phi(part.ph.phi);
752 
754  grid.get_grid_weights(magnetic_field, part.ph.v(), grid_wts0);
755 
756  // Output argument
757  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
758  not_in_triangle[i_simd] = !grid_wts0.is_valid(i_simd);
759  }
760 
761  Simd<double> bmag;
762  magnetic_field.bmag_interpol(part.ph.v(), bmag);
763 
764  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
765  if(!grid_wts0.is_valid(i_simd)) continue;
766 
767  nearest_node[i_simd]=grid_wts0.node[i_simd] - pol_decomp.node_offset;
768  not_in_poloidal_domain[i_simd] = (nearest_node[i_simd]<0 || nearest_node[i_simd]>=pol_decomp.nnodes);
769 
770  double temp_ev_norm = not_in_poloidal_domain[i_simd] ? f0.fg_temp_ev(0) : f0.fg_temp_ev(nearest_node[i_simd]);
771 
772  // get vp and smu
773  const double& B = bmag[i_simd];
774  vp[i_simd] = normalized_v_para(c_m, mass, B, temp_ev_norm, part.ph.rho[i_simd]);
775  smu[i_simd] = normalized_sqrt_mu(B, temp_ev_norm, part.ct.mu[i_simd]);
776  }
777  }
778 
779  // get flow in m/s according to eq_flow_type
780  KOKKOS_INLINE_FUNCTION double eq_flow_ms(const MagneticField<DeviceType> &magnetic_field, double psi_in,double r,double z, double bphi_over_b) const {
781 
782  double flow = eq_flow.value(magnetic_field,psi_in,r,z);
783  flow=(eq_flow_type>=1)? flow*r : flow;
784  flow=(eq_flow_type>=2)? flow*bphi_over_b : flow ;
785 
786  return flow;
787  }
788 
790  const Grid<DeviceType>& grid,
792  const VelocityGrid& vgrid);
793 
796 
797  void write_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
798  void write_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
799  void read_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
800  void read_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name, bool n_ranks_is_same, int version);
802 
803  long long int get_max_gid() const;
804  void get_ptl_write_total_and_offsets(const DomainDecomposition<DeviceType>& pol_decomp, long long int& inum_total, long long int& ioff) const;
805 };
806 
807 #endif
Cabana::AoSoA< PhaseDataTypes, HostType, VEC_LEN > phase0
Definition: species.hpp:120
WeightEvoEq weight_evo_eq
Definition: species.hpp:91
bool stream_particles
Whether to stream particles between host and device if possible.
Definition: species.hpp:112
bool maxwellian_init
whether initial distribution is maxwellian
Definition: species.hpp:97
Definition: globals.hpp:84
KOKKOS_INLINE_FUNCTION VecPhase * ph0() const
Definition: species.hpp:600
KOKKOS_INLINE_FUNCTION int divide_and_round_up(int a, int b)
Definition: globals.hpp:206
bool owns_particles_d
Whether the species owns the device particle allocation right now.
Definition: species.hpp:109
void back_up_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:433
KOKKOS_INLINE_FUNCTION double normalized_v_para(double c_m, double mass, double B, double temp_ev, double rho)
Definition: basic_physics.hpp:96
subroutine adjust_n_ptl_for_core_ptl(n_ptl)
Definition: load.F90:54
void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles *ptl)
void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const
Definition: species.hpp:495
constexpr double PROTON_MASS
Definition: constants.hpp:7
MarkerType
Definition: globals.hpp:110
Distribution< Device > f0
Species distribution in velocity space on local mesh nodes.
Definition: species.hpp:128
MPI_Comm SML_COMM_WORLD
Definition: my_mpi.cpp:4
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > backup_particles
Copy of particles to be restored for RK2.
Definition: species.hpp:124
bool is_electron
Whether this species is the electrons.
Definition: species.hpp:79
Eq::Profile< Device > eq_mk_temp
Definition: species.hpp:141
static int get_initial_n_ptl(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx, bool verbose)
Definition: species.cpp:96
void for_all_particles(const std::string label, F lambda_func, const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default)
Definition: species.hpp:546
bool dynamic_f0
Whether f0 can evolve in time.
Definition: species.hpp:96
Definition: gyro_avg_mat.hpp:18
void save_backup_particles()
Definition: species.hpp:621
void update_decomposed_f0_calculations(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const VelocityGrid &vgrid)
Definition: species.cpp:434
double c2_2m
c2/2m
Definition: species.hpp:87
double rho[VEC_LEN]
Definition: particles.hpp:99
void copy_to_phase0(Species< Device > &species)
Definition: species.hpp:604
Definition: species.hpp:58
Simd< double > w1
delta-f weight
Definition: particles.hpp:22
double c_m
c/m
Definition: species.hpp:86
Definition: species.hpp:57
constexpr double EV_2_J
Conversion rate ev to J.
Definition: constants.hpp:5
Definition: velocity_grid.hpp:8
bool default_streaming_option()
Definition: species.hpp:28
Eq::Profile< Device > eq_den
Definition: species.hpp:133
Definition: globals.hpp:89
KOKKOS_INLINE_FUNCTION VecParticles * ptl() const
Definition: species.hpp:596
Definition: grid_weights.hpp:47
KOKKOS_INLINE_FUNCTION double thermal_velocity(double mass, double temp_ev)
Definition: basic_physics.hpp:74
Definition: NamelistReader.hpp:193
KinType kintype
Whether the species is gyrokinetic or drift kinetic.
Definition: species.hpp:82
Eq::Profile< Device > eq_fg_temp
Definition: species.hpp:137
Definition: magnetic_field.hpp:12
void get_ptl_write_total_and_offsets(const DomainDecomposition< DeviceType > &pol_decomp, long long int &inum_total, long long int &ioff) const
Definition: species.cpp:596
int add_vec_buffer(int n_ptl)
Definition: particles.hpp:200
int idx
Index in all_species.
Definition: species.hpp:78
Definition: particles.hpp:95
int a
The index in the inner array of the AoSoA.
Definition: particles.hpp:156
KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const
Definition: species.hpp:742
Definition: globals.hpp:192
Definition: particles.hpp:115
KOKKOS_INLINE_FUNCTION void bmag_interpol(const SimdVector &v, Simd< double > &bmag) const
Definition: magnetic_field.tpp:313
bool particles_are_backed_up
Whether particles are currently backed up.
Definition: species.hpp:117
int nonadiabatic_idx
Index of species skipping adiabatic species (for compatibility with fortran arrays) ...
Definition: species.hpp:81
bool default_residence_option()
Definition: species.hpp:35
int n_ptl
Number of particles.
Definition: species.hpp:103
Eq::Profile< Device > eq_mk_flow
Definition: species.hpp:143
KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const DomainDecomposition< DeviceType > &pol_decomp, SimdParticles &part, Simd< double > &smu, Simd< double > &vp, Simd< int > &nearest_node, Simd< bool > &not_in_triangle, Simd< bool > &not_in_poloidal_domain) const
Definition: species.hpp:747
Definition: streamed_parallel_for.hpp:16
KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const
Definition: species.hpp:719
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:59
Definition: streamed_parallel_for.hpp:14
Eq::Profile< Device > eq_fg_flow
Definition: species.hpp:138
void set_buffer_phase0_d()
Definition: species.hpp:404
long long int get_total_n_ptl()
Definition: species.hpp:693
void set_buffer_particles_d()
Definition: species.hpp:375
Simd< double > rho
m*v_para/(q*B) - A_para^h/B (should it be plus or minus?)
Definition: particles.hpp:21
Definition: species.hpp:59
int p_range< DeviceType >(int num_particle)
Definition: particles.hpp:193
int eq_flow_type
Definition: species.hpp:135
double charge_eu
Particle charge in eu.
Definition: species.hpp:85
Definition: species.hpp:51
int nnodes
Number of nodes belonging to this MPI rank.
Definition: domain_decomposition.hpp:60
void resize_particles(int new_n_ptl)
Definition: species.hpp:245
double mass
Particle mass.
Definition: species.hpp:83
void initialize_global_f0_arrays(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field)
Definition: species.cpp:500
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const
Definition: species.hpp:726
void write_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:621
void for_all_particles(const std::string label, F lambda_func) const
Definition: species.hpp:428
Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > particles_d
Particles on device.
Definition: species.hpp:107
Definition: species.hpp:48
double w2[VEC_LEN]
Definition: particles.hpp:101
#define TIMER(N, F)
Definition: timer_macro.hpp:24
FAnalyticShape
Definition: globals.hpp:116
Eq::Profile< Device > eq_mk_den
Definition: species.hpp:142
RKRestorationMethod
Definition: species.hpp:68
void copy_particles_to_device_if_not_resident()
Definition: species.hpp:358
RKRestorationMethod RK_restoration_method
Currently, electrons must use first method and ions must use second.
Definition: species.hpp:115
Simd< double > r
Cylindrical coordinate R (major radial direction)
Definition: particles.hpp:18
void resize_host_particles_to_match_device()
Definition: species.hpp:265
Definition: species.hpp:69
KOKKOS_INLINE_FUNCTION SimdVector & v()
Definition: particles.hpp:39
ReturnOpt return_opt
Definition: species.hpp:63
Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, int ncycles_in)
Definition: species.cpp:11
ReturnOpt
Definition: species.hpp:56
Option
Definition: streamed_parallel_for.hpp:13
void restore_particles_from_backup()
Definition: species.hpp:642
void read_initial_distribution(NLReader::NamelistReader &nlr, const DomainDecomposition< DeviceType > &pol_decomp)
Definition: species.cpp:803
Definition: globals.hpp:202
MarkerType marker_type
Marker type: reduced delta-f, total-f, full-f, or none (placeholder for adiabatic species) ...
Definition: species.hpp:89
Definition: globals.hpp:90
static std::vector< MemoryPrediction > estimate_memory_usage(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx)
Definition: species.cpp:41
SendOpt send_opt
Definition: species.hpp:62
double charge
Particle charge.
Definition: species.hpp:84
SimdPhase ph
Definition: particles.hpp:62
void copy_particles_from_device()
Definition: species.hpp:333
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const
Definition: species.hpp:732
void copy_particles_from_device_if_not_resident()
Definition: species.hpp:364
KOKKOS_INLINE_FUNCTION void wedge_modulo_phi(Simd< double > &phi_mod) const
Definition: grid.tpp:86
void unassign_host_particles()
Definition: species.hpp:282
void read_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name, bool n_ranks_is_same, int version)
Definition: species.cpp:671
int ncycles_between_sorts
Number of subcycles between sorts.
Definition: species.hpp:100
Definition: particles.hpp:61
Cabana::AoSoA< PhaseDataTypes, Device, VEC_LEN > phase0_d
Definition: species.hpp:121
int SML_COMM_RANK
Definition: my_mpi.cpp:5
KinType
Definition: globals.hpp:88
Species(SpeciesType sp_type, int n_ptl)
Definition: species.hpp:157
Definition: xgc_io.hpp:24
VecPhase ph
Definition: particles.hpp:116
Definition: species.hpp:44
Definition: species.hpp:70
void set_min_max_num(int isp, int n_ptl)
int minimum_ptl_reservation
The minimum reservation size for particles.
Definition: species.hpp:102
int s
The index in the outer array of the AoSoA.
Definition: particles.hpp:155
KOKKOS_INLINE_FUNCTION double normalized_sqrt_mu(double B, double temp_ev, double mu)
Definition: basic_physics.hpp:106
Simd< double > z
Cylindrical coordinate Z.
Definition: particles.hpp:19
void copy_particles_to_device_if_resident()
Definition: species.hpp:346
Definition: species.hpp:52
void resize_device_particles(int new_n_ptl)
Definition: species.hpp:303
Definition: species.hpp:53
constexpr double UNIT_CHARGE
Charge of an electron (C)
Definition: constants.hpp:4
long long int get_max_gid() const
Definition: species.cpp:573
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const
Definition: species.hpp:738
void exit_XGC(std::string msg)
Definition: globals.hpp:37
void copy_particles_from_device_if_resident()
Definition: species.hpp:352
bool is_adiabatic
Whether this species is adiabatic.
Definition: species.hpp:80
Simd< double > phi
Cylindrical coordinate phi (toroidal direction)
Definition: particles.hpp:20
FAnalyticShape f_analytic_shape
f_analytic_shape shape: Maxwellian, SlowingDown or None
Definition: species.hpp:90
Definition: magnetic_field.F90:1
static constexpr const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t Async
Definition: space_settings.hpp:83
int n_backup_particles
Definition: species.hpp:125
Eq::Profile< Device > eq_flow
Definition: species.hpp:134
Definition: streamed_parallel_for.hpp:15
SendOpt
Definition: species.hpp:50
void read_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:766
SimdConstants ct
Definition: particles.hpp:63
int eq_fg_flow_type
Definition: species.hpp:139
void write_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:730
void copy_particles_to_device()
Definition: species.hpp:317
KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices< Device > &inds, SimdParticles &part_one) const
Definition: species.hpp:680
Species(int n_ptl_in)
Definition: species.hpp:191
double phi[VEC_LEN]
Definition: particles.hpp:98
Simd< double > w2
(1 - background distribution)/f0
Definition: particles.hpp:23
double r[VEC_LEN]
Definition: particles.hpp:96
Definition: species.hpp:75
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
KOKKOS_INLINE_FUNCTION void get_grid_weights(const MagneticField< Device > &magnetic_field, const SimdVector &v, const Simd< double > &psi, SimdVector2D &xff, SimdGridWeights< Order::One, PIT > &grid_wts) const
Definition: grid.tpp:32
GyroAverageMatrices< Device > gyro_avg_matrices
Definition: species.hpp:147
Definition: species.hpp:44
Eq::Profile< Device > eq_temp
Definition: species.hpp:132
bool particles_resident_on_device
Whether the particles can reside on device.
Definition: species.hpp:111
Simd< double > mu
m*v_perp^2/(2B)
Definition: particles.hpp:52
PtlMvmt(SendOpt send_opt, ReturnOpt return_opt)
Definition: species.hpp:65
int ncycles
Number of subcycles.
Definition: species.hpp:99
int eq_mk_flow_type
Definition: species.hpp:144
WeightEvoEq
Definition: globals.hpp:121
Definition: profile.hpp:171
int collision_grid_index
Which collision grid to use.
Definition: species.hpp:130
Definition: particles.hpp:154
SpeciesType
Definition: globals.hpp:83
double z[VEC_LEN]
Definition: particles.hpp:97
void restore_backup_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:462
Definition: distribution.hpp:11
void resize_device_particles()
Definition: species.hpp:288
int get_max_n_ptl()
Definition: species.hpp:704
LaunchBounds
Definition: species.hpp:417
KOKKOS_INLINE_FUNCTION double eq_flow_ms(const MagneticField< DeviceType > &magnetic_field, double psi_in, double r, double z, double bphi_over_b) const
Definition: species.hpp:780
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > particles
Particles.
Definition: species.hpp:104
double w1[VEC_LEN]
Definition: particles.hpp:100
Definition: species.hpp:44