XGC1
species.hpp
Go to the documentation of this file.
1 #ifndef SPECIES_HPP
2 #define SPECIES_HPP
3 #include "NamelistReader.hpp"
4 #include "timer_macro.hpp"
5 #include "magnetic_field.hpp"
6 #include "grid.hpp"
8 #include "particles.hpp"
9 #include "space_settings.hpp"
10 #include "distribution.hpp"
11 #include "profile.hpp"
12 #include "gyro_avg_mat.hpp"
14 #include "basic_physics.hpp"
15 #include "memory_prediction.hpp"
16 #include "xgc_io.hpp"
17 
18 extern "C" void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles* ptl);
19 extern "C" void set_min_max_num(int isp, int n_ptl);
20 extern "C" void adjust_n_ptl_for_core_ptl(int* n_ptl);
21 
26  return false;
27 }
28 
33  return false;
34 }
35 
36 struct PtlMvmt{
37  // Options for particle location management when looping over particles
38  enum SendOpt{
39  NoSend=0,
42  };
43 
44  enum ReturnOpt{
48  };
49 
52 
54 };
55 
59 };
60 
61 // Species class
62 template<class Device>
63 class Species{
64  public:
65 
66  int idx;
67  bool is_electron;
68  bool is_adiabatic;
71  double mass;
72  double charge;
73  double charge_eu;
74  double c_m;
75  double c2_2m;
76 
84  bool dynamic_f0;
86 
87  int ncycles;
89 
91  int n_ptl;
93 
94  // Device particles
96 
98 
101 
102  /*** Could be its own class inside species? ***/
106 
107  // phase0 (for ion restoration)
110 
111  // For electron restoration
114  int n_backup_particles; // Number of particles stored in backup_particles (can't be deduced from its size due to buffer)
115  bool backup_particles_on_device; // Whether to store back-up particles in device memory
116  /****/
117 
118  View<int*,CLayout,Device> tr_save; // Last known triangle position
119 
121 
123 
124  Eq::Profile<Device> eq_temp; // Equilibrium temperature
125  Eq::Profile<Device> eq_den; // Equilibrium density
126  Eq::Profile<Device> eq_flow; // Equilibrium flow
127  int eq_flow_type; // Type of Equilibirum flow
128 
129  Eq::Profile<Device> eq_fg_temp; // Equilibrium temperature
130  Eq::Profile<Device> eq_fg_flow; // Equilibrium flow - not used for now
131  int eq_fg_flow_type; // Type of Equilibirum flow
132 
133  Eq::Profile<Device> eq_mk_temp; // Equilibrium temperature
134  Eq::Profile<Device> eq_mk_den; // Equilibrium density
135  Eq::Profile<Device> eq_mk_flow; // Equilibrium flow
136  int eq_mk_flow_type; // Type of Equilibirum flow
137 
138 
140 
141  /*** Constructors ***/
142 
143  Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in,
144  int ncycles_in);
145 
146  Species(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const MagneticField<DeviceType> &magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, int idx_in, int nonadiabatic_idx_in);
147 
148  // Electron or ion default constructor
149  Species(SpeciesType sp_type, int n_ptl)
150  : idx(sp_type==ELECTRON ? 0 : 1),
151  is_electron(sp_type==ELECTRON),
152  mass(is_electron ? 3.344e-30 : PROTON_MASS),
154  charge_eu(is_electron ? -1.0 : 1.0),
156  nonadiabatic_idx(idx), // Since is_adiabatic is false above
158  ncycles(is_electron ? 70 : 1),
159  c_m(charge/mass),
160  c2_2m(0.5*charge*charge/mass),
162  n_ptl(n_ptl),
163  backup_particles("backup_particles", 0),
164  backup_particles_d("backup_particles_d", 0),
166  particles("particles", add_vec_buffer(n_ptl)),
169  eq_temp(1.0e3,-0.1),
170  eq_den(1.0e19,-0.1),
171  eq_flow_type(2),
172  eq_fg_temp(1.0e3,-0.1),
173  eq_fg_flow_type(2),
174  eq_mk_temp(1.0e3,-0.1),
175  eq_mk_den(1.0e19,-0.1),
176  eq_mk_flow_type(2),
181 
182  // Special constructor for tests that involve tracking particles in memory
183  // The idea is to use these particles to test functions that reorder particles,
184  // e.g. sort, shift, and cleaning
185  Species(int n_ptl_in)
186  : n_ptl(n_ptl_in),
187  is_electron(true),
189  particles("particles", add_vec_buffer(n_ptl_in)),
190  tr_save("tr_save", 0),
197  {
198 
199  // Slice particle properties
200  auto ph = Particles::slice<PtlSlice::Ph>(particles);
201  auto ct = Particles::slice<PtlSlice::Ct>(particles);
202  auto gid = Particles::slice<PtlSlice::Gid>(particles);
203 #ifdef ESC_PTL
204  auto flag = Particles::slice<PtlSlice::Flag>(particles);
205 #endif
206 
207  // Offset gid if using MPI
208 #ifdef USE_MPI
209  long long int gid_offset = n_ptl*SML_COMM_RANK;
210 #else
211  long long int gid_offset = 0;
212 #endif
213 
214  // Assign trackable values
215  for (int i=0;i<n_ptl;i++){
216  // Set GID in order
217  gid(i) = gid_offset + i+1; // 1-indexed
218 
219  // Value of properties is gid + 0.1*(property index)
220  // First particle: (1.0, 1.1, ... 1.8)
221  // Second particle: (2.0, 2.1, ... 2.8)
222  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = gid(i) + (j)*0.1;
223  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = gid(i) + (j+PTL_NPHASE)*0.1;
224  }
225 
226  // Buffer particles: same but with gid = -1
227  if(n_ptl>0){
228  for (int i=n_ptl;i<add_vec_buffer(n_ptl);i++){
229  gid(i) = -1;
230  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = gid(i) + (j)*0.1;
231  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = gid(i) + (j+PTL_NPHASE)*0.1;
232  }
233  }
234  }
235 
236  static std::vector<MemoryPrediction> estimate_memory_usage(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx);
237 
238  static int get_initial_n_ptl(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx, bool verbose);
239 
240  void resize_particles(int new_n_ptl){
241  n_ptl = new_n_ptl;
242 
243 #ifndef USE_GPU
244  // If CPU-only, particles_d points to the same location as particles. If particles is resized, then Cabana will not deallocate the first allocation
245  // since it is still used by particles_d. So, reset particles_d before resize, and point it back to particles only afterwards
247 #endif
248 
249  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
250  particles.resize(add_vec_buffer(n_ptl));
251 
252 #ifndef USE_GPU
253  // Point particles_d back to particles after resize
255 #endif
256 
258  }
259 
261 #ifdef USE_GPU
262  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
263  particles.resize(particles_d.size());
264 #else
265  // Point particles back to particles_d after resize
267 #endif
268 
270  }
271 
272  /* If using CPU-only, then "device" particles are a shallow copy of host particles so that
273  * there is no unnecessary duplication. When "device" particles are resized, Cabana will keep the
274  * original allocation if there is a second reference (i.e. host particles).
275  * To resolve this, we free the host particles here so that there is no second reference
276  * */
279  }
280 
281  /* Resizes device particles if on GPU, or just creates a shallow copy if CPU only
282  * */
284  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
285 
286 #ifdef USE_GPU
287  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
288  // Resize device particles to match n particles
290 #else
291  // If kernels are on CPU, do shallow copy
293 #endif
294  }
295 
296  /* Resizes device particles
297  * */
298  void resize_device_particles(int new_n_ptl){
299  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
300 
301  n_ptl = new_n_ptl;
302 
303  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
304 
305  // Resize device particles to match host particles
307  }
308 
309  /* Copies particles to device - deep copy if using GPU, otherwise shallow copy
310  * Also takes the opportunity to set the buffer particles to realistic values
311  * */
313  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles to device, but doesn't own the device array.");
314 
315 #ifdef USE_GPU
316  // Copy to device
318 #else
319  // No operation required if CPU-only
320 #endif
321 
322  // Copy last particle to fill remainder of trailing vector in AoSoA
324  }
325 
326  /* Copies particles from device - deep copy if using GPU, otherwise no copy is necessary
327  * */
329  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles from device, but doesn't own the device array.");
330 
331 #ifdef USE_GPU
332  // Copy particles to host
334 #else
335  // No operation required if CPU-only
336 #endif
337  }
338 
339  /* Copies particles to device if they are resident on the device
340  * */
343  }
344 
345  /* Copies particles from device if they are resident on the device
346  * */
349  }
350 
351  /* Copies particles to device if they are NOT resident on the device
352  * */
355  }
356 
357  /* Copies particles from device if they are NOT resident on the device
358  * */
361  }
362 
371  if (n_ptl>0){
372  int last_ptl_index = n_ptl - 1;
373  auto ph = Particles::slice<PtlSlice::Ph>(particles_d);
374  auto ct = Particles::slice<PtlSlice::Ct>(particles_d);
375  auto gid = Particles::slice<PtlSlice::Gid>(particles_d);
376 #ifdef ESC_PTL
377  auto flag = Particles::slice<PtlSlice::Flag>(particles_d);
378 #endif
379 
380  Kokkos::parallel_for("set_buffer_particles_d", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
381  // Buffer particles: same as last particle, gid = -1
382  for (int j=0;j<PTL_NPHASE;j++) ph(i, j) = ph(last_ptl_index, j);
383  for (int j=0;j<PTL_NCONST;j++) ct(i, j) = ct(last_ptl_index, j);
384  gid(i) = -1;
385 #ifdef ESC_PTL
386  flag(i) = flag(last_ptl_index);
387 #endif
388  });
389  }
390  }
391 
400  if (n_ptl>0){
401  int last_ptl_index = n_ptl - 1;
402  auto ph = Particles::slice<PtlSlice::Ph>(phase0_d);
403 
404  Kokkos::parallel_for("set_buffer_phase0", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
405  // copy final real particle
406  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
407  });
408  }
409  }
410 
411  // Options for custom launch bounds since kokkos defaults are suboptimal for electron push kernel
412  enum class LaunchBounds{
413  Default,
414  Custom
415  };
416 
422  template<typename F>
423  inline void for_all_particles(const std::string label, F lambda_func) const {
424  Kokkos::RangePolicy<ExSpace> particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
425  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
426  }
427 
428  inline void back_up_SoA(Particles::Array<ParticleDataTypes,Device>& backup_SoA, int offset, int n) const{
429  auto ph_b = Particles::slice<PtlSlice::Ph>(backup_SoA);
430  auto ct_b = Particles::slice<PtlSlice::Ct>(backup_SoA);
431  auto gid_b = Particles::slice<PtlSlice::Gid>(backup_SoA);
432 #ifdef ESC_PTL
433  auto flag_b = Particles::slice<PtlSlice::Flag>(backup_SoA);
434 #endif
435 
436  auto ph = Particles::slice<PtlSlice::Ph>(particles_d);
437  auto ct = Particles::slice<PtlSlice::Ct>(particles_d);
438  auto gid = Particles::slice<PtlSlice::Gid>(particles_d);
439 #ifdef ESC_PTL
440  auto flag = Particles::slice<PtlSlice::Flag>(particles_d);
441 #endif
442 
443  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
444  int i_offset = i + offset;
445  // Make backup copy
446  for (int j=0;j<PTL_NPHASE;j++) ph_b(i, j) = ph(i_offset, j);
447  for (int j=0;j<PTL_NCONST;j++) ct_b(i, j) = ct(i_offset, j);
448  gid_b(i) = gid(i_offset);
449 #ifdef ESC_PTL
450  flag_b(i) = flag(i_offset);
451 #endif
452  // Deactivate
453  gid(i_offset) = -1;
454  });
455  }
456 
457  inline void restore_backup_SoA(Particles::Array<ParticleDataTypes,Device>& backup_SoA, int offset, int n) const{
458  auto ph_b = Particles::slice<PtlSlice::Ph>(backup_SoA);
459  auto ct_b = Particles::slice<PtlSlice::Ct>(backup_SoA);
460  auto gid_b = Particles::slice<PtlSlice::Gid>(backup_SoA);
461 #ifdef ESC_PTL
462  auto flag_b = Particles::slice<PtlSlice::Flag>(backup_SoA);
463 #endif
464 
465  auto ph = Particles::slice<PtlSlice::Ph>(particles_d);
466  auto ct = Particles::slice<PtlSlice::Ct>(particles_d);
467  auto gid = Particles::slice<PtlSlice::Gid>(particles_d);
468 #ifdef ESC_PTL
469  auto flag = Particles::slice<PtlSlice::Flag>(particles_d);
470 #endif
471 
472  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
473  int i_offset = i + offset;
474  // Restore from backup copy
475  for (int j=0;j<PTL_NPHASE;j++) ph(i_offset, j) = ph_b(i, j);
476  for (int j=0;j<PTL_NCONST;j++) ct(i_offset, j) = ct_b(i, j);
477  gid(i_offset) = gid_b(i);
478 #ifdef ESC_PTL
479  flag(i_offset) = flag_b(i);
480 #endif
481  });
482  }
483 
489  template<typename F>
490  inline void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const {
491  if(end_idx <= begin_idx) return; // Return if range is 0 or less
492 
493  // Still need the subset to line up with the AoSoA vector length
494  int first_soa = begin_idx/VEC_LEN;
495  int n_other_ptl_in_first_soa = begin_idx - first_soa*VEC_LEN;
496  bool first_soa_is_partial = (n_other_ptl_in_first_soa>0);
497  int last_soa = (end_idx-1)/VEC_LEN;
498  int n_other_ptl_in_last_soa = (last_soa+1)*VEC_LEN - end_idx;
499  bool last_soa_is_partial = (n_other_ptl_in_last_soa>0);
500 
501 #ifdef USE_GPU
502  int first_item_in_shifted_range = first_soa*VEC_LEN;
503 #else
504  int first_item_in_shifted_range = first_soa;
505 #endif
506 
509  if(first_soa_is_partial){
510  // Make a backup of the first SoA and set the particles_d GIDs to -1
511  ptl_first_soa = Particles::Array<ParticleDataTypes,Device>("ptl_first_soa", n_other_ptl_in_first_soa);
512  back_up_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
513  }
514  if(last_soa_is_partial){
515  // Make a backup of the last SoA and set the particles_d GIDs to -1
516  ptl_last_soa = Particles::Array<ParticleDataTypes,Device>("ptl_last_soa", n_other_ptl_in_last_soa);
517  back_up_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
518  }
519 
520  // Finally, do the parallel_for
521  Kokkos::RangePolicy<ExSpace> particle_range_policy( first_item_in_shifted_range, p_range<DeviceType>(end_idx) );
522  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
523 
524  // Restore from backup
525  if(first_soa_is_partial){
526  restore_backup_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
527  }
528  if(last_soa_is_partial){
529  restore_backup_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
530  }
531  }
532 
540  template<typename F>
541  inline void for_all_particles(const std::string label, F lambda_func,
542  const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default) {
543  if(!owns_particles_d) exit_XGC("\nSpecies tried to loop over particles on device, but doesn't own the device array.");
544 
545  bool use_streaming = stream_particles;
546 #ifndef USE_STREAMS
547  use_streaming = false; // Just to be safe, turn streams off here
548 #endif
549 
550  bool send_ptl = ( mvmt.send_opt==PtlMvmt::Send ||
552  bool return_ptl = ( mvmt.return_opt==PtlMvmt::Return ||
554 
555  // Don't need to stream if particles are already present and don't need to be returned
556  if((!send_ptl) && (!return_ptl)) use_streaming = false;
557 
558  if(use_streaming){
559 #ifdef USE_STREAMS
560  Streamed::Option stream_option = Streamed::Normal; // Send to device and back
561  if(!send_ptl) stream_option = Streamed::NoSend;
562  if(!return_ptl) stream_option = Streamed::NoReturn;
563 
564  // Execute streaming parallel_for
565  Streamed::parallel_for(label, n_ptl, lambda_func, stream_option, particles, particles_d);
566 #endif
567  }else{
568  if(send_ptl) TIMER("copy_ptl_to_device",copy_particles_to_device() );
569 
570  if (launch_bounds==LaunchBounds::Custom) {
571 #ifdef USE_EPUSH_LAUNCH_BOUNDS
572 # if !defined(PUSH_MAX_THREADS_PER_BLOCK) || !defined(PUSH_MIN_WARPS_PER_EU)
573 # error "USE_EPUSH_LAUNCH_BOUNDS requires PUSH_MAX_THREADS_PER_BLOCK and PUSH_MIN_WARPS_PER_EU to be defined"
574 # endif
575  Kokkos::RangePolicy<ExSpace, Kokkos::LaunchBounds<PUSH_MAX_THREADS_PER_BLOCK, PUSH_MIN_WARPS_PER_EU>>
576  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
577  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
578 #else
579  exit_XGC("\nERROR: LaunchBounds::Custom specified, but USE_EPUSH_LAUNCH_BOUNDS is not defined\n");
580 #endif
581  } else {
582  Kokkos::RangePolicy<ExSpace>
583  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
584  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
585  }
586 
587  if(return_ptl) TIMER("copy_ptl_from_device", copy_particles_from_device() );
588  }
589  }
590 
591  KOKKOS_INLINE_FUNCTION VecParticles* ptl() const{
592  return (VecParticles*)(&particles_d.access(0));
593  }
594 
595  KOKKOS_INLINE_FUNCTION VecPhase* ph0() const{
596  return (VecPhase*)(&phase0_d.access(0));
597  }
598 
600  for_all_particles("copy_to_phase0", KOKKOS_LAMBDA( const int idx ){
602  VecParticles* ptl_loc = species.ptl();
603  VecPhase* ph0_loc = species.ph0();
604  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
605  int p_vec = inds.a + i_simd;
606  ph0_loc[inds.s].r[p_vec] = ptl_loc[inds.s].ph.r[p_vec];
607  ph0_loc[inds.s].z[p_vec] = ptl_loc[inds.s].ph.z[p_vec];
608  ph0_loc[inds.s].phi[p_vec] = ptl_loc[inds.s].ph.phi[p_vec];
609  ph0_loc[inds.s].rho[p_vec] = ptl_loc[inds.s].ph.rho[p_vec];
610  ph0_loc[inds.s].w1[p_vec] = ptl_loc[inds.s].ph.w1[p_vec];
611  ph0_loc[inds.s].w2[p_vec] = ptl_loc[inds.s].ph.w2[p_vec];
612  }
613  });
614  }
615 
616  bool phase0_is_stored() const{
618  }
619 
622  phase0_d.resize(phase0.size());
624  }
625  }
626 
630  backup_particles_d.resize(particles_d.size());
632  }else{
633  // If particles are resident on the device, then use the host particle allocation as the backup
634  // Otherwise, resize the backup_particle array
637  }else{
638  backup_particles.resize(particles_d.size());
639  }
640 
641  // Copy particle data to backup
643  }
645  } else {
646  // For ions, save phase to phase0
647  phase0_d.resize(particles_d.size());
648  copy_to_phase0(*this); // Separate function to avoid implicit copy into lambda
649  }
651  }
652 
655  // If particles are resident on device, don't need to resize host particles since they are already used as the backup.
656  // If not, resize host particle array to fit backup particles
660  }else{
661  // If particles are not resident on device, resize host particle array to fit backup particles
663  }
664 
665  // Resize device particle array to fit backed up particles
667 
670  }else{
671  // Restore particle data from backup
673 
675  // If the backup particles are simply pointing to the host particles, then
676  // point them to their own 0-sized allocation when finished using them
677  // so that they don't make a copy when host particles get resized
679  }
680  }
681  } else {
683 
684  // Fill buffer with realistic ptl data
686  }
687  particles_are_backed_up = false;
688  }
689 
690  // If phase0 is needed (i.e. for ions), copy phase0 back to host and deallocate device copy
693  phase0.resize(phase0_d.size());
695  phase0_d.resize(0);
696  }
697  }
698 
699  // Deallocate phase0
702  phase0.resize(0);
703  }
704  phase0_d.resize(0);
705  }
706 
707  KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices<Device>& inds, SimdParticles& part_one) const {
708  VecPhase* ph0_loc = ph0();
709  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
710  int p_vec = inds.a + i_simd;
711  part_one.ph.r[i_simd] = ph0_loc[inds.s].r[p_vec];
712  part_one.ph.z[i_simd] = ph0_loc[inds.s].z[p_vec];
713  part_one.ph.phi[i_simd] = ph0_loc[inds.s].phi[p_vec];
714  part_one.ph.rho[i_simd] = ph0_loc[inds.s].rho[p_vec];
715  part_one.ph.w1[i_simd] = ph0_loc[inds.s].w1[p_vec];
716  part_one.ph.w2[i_simd] = ph0_loc[inds.s].w2[p_vec];
717  }
718  }
719 
720  long long int get_total_n_ptl(){
721 #ifdef USE_MPI
722  long long int tmp_n_ptl = n_ptl;
723  long long int out_n_ptl = 0;
724  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_LONG_LONG_INT, MPI_SUM, SML_COMM_WORLD);
725  return out_n_ptl;
726 #else
727  return (long long int)(n_ptl);
728 #endif
729  }
730 
732 #ifdef USE_MPI
733  int tmp_n_ptl = n_ptl;
734  int out_n_ptl = 0;
735  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_INT, MPI_MAX, SML_COMM_WORLD);
736  return out_n_ptl;
737 #else
738  return n_ptl;
739 #endif
740  }
741 
742  // Gets the gyro_radius of a species based on equilibrium temperature
743  // inode is the LOCAL (poloidally decomposed) grid node index to get temperature
744  // smu_n is the normalized sqrt(mu)
745  // bfield is the magnetic field at inode
746  KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const{
747  // Should replace UNIT_CHARGE*charge_eu with charge(?)
748  return smu_n*sqrt(mass*f0.fg_temp_ev(inode)*EV_2_J) / (UNIT_CHARGE*charge_eu*bfield);
749  }
750 
751  // Gets the equilibrium thermal velocity of a species based on f0 temperature
752  // inode is the GLOBAL node index to get temperature
753  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const{
754  return thermal_velocity(mass, f0.temp_global(inode));
755  }
756 
757  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on device
758  // inode is the local node index to get temperature
759  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const{
760  return thermal_velocity(mass, f0.temp_ev(inode));
761  }
762 
763  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on host
764  // inode is the local node index to get temperature
765  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const{
766  return thermal_velocity(mass, f0.temp_ev_h(inode));
767  }
768 
769  KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const{
770  return thermal_velocity(mass, f0.fg_temp_ev_h(inode));
771  }
772 
773  // Get species velocity
774  KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid<DeviceType>& grid, const MagneticField<DeviceType>& magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, SimdParticles& part, Simd<double>& smu, Simd<double>& vp, Simd<int>& nearest_node, Simd<bool>& not_in_triangle, Simd<bool>& not_in_poloidal_domain) const{
775 
776  // This modulo surely doesnt need to be here (at least, should be elsewhere).
777  // Modulo phi coordinate
778  grid.wedge_modulo_phi(part.ph.phi);
779 
781  grid.get_grid_weights(magnetic_field, part.ph.v(), grid_wts0);
782 
783  // Output argument
784  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
785  not_in_triangle[i_simd] = !grid_wts0.is_valid(i_simd);
786  }
787 
788  Simd<double> bmag;
789  magnetic_field.bmag_interpol(part.ph.v(), bmag);
790 
791  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
792  if(!grid_wts0.is_valid(i_simd)) continue;
793 
794  nearest_node[i_simd]=grid_wts0.node[i_simd] - pol_decomp.node_offset;
795  not_in_poloidal_domain[i_simd] = (nearest_node[i_simd]<0 || nearest_node[i_simd]>=pol_decomp.nnodes);
796 
797  double temp_ev_norm = not_in_poloidal_domain[i_simd] ? f0.fg_temp_ev(0) : f0.fg_temp_ev(nearest_node[i_simd]);
798 
799  // get vp and smu
800  const double& B = bmag[i_simd];
801  vp[i_simd] = normalized_v_para(c_m, mass, B, temp_ev_norm, part.ph.rho[i_simd]);
802  smu[i_simd] = normalized_sqrt_mu(B, temp_ev_norm, part.ct.mu[i_simd]);
803  }
804  }
805 
806  // get flow in m/s according to eq_flow_type
807  KOKKOS_INLINE_FUNCTION double eq_flow_ms(const MagneticField<DeviceType> &magnetic_field, double psi_in,double r,double z, double bphi_over_b) const {
808 
809  double flow = eq_flow.value(magnetic_field,psi_in,r,z);
810  flow=(eq_flow_type>=1)? flow*r : flow;
811  flow=(eq_flow_type>=2)? flow*bphi_over_b : flow ;
812 
813  return flow;
814  }
815 
816  // Return tr_save if available, otherwise return -1
817  KOKKOS_INLINE_FUNCTION void get_tr_save(int i_item, Simd<int>& itr) const{
818  int ibase = i_item*SIMD_SIZE;
819  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
820  int i = ibase + i_simd;
821  itr[i_simd] = i<tr_save.extent(0) ? (tr_save(i)+1) : -1; // tr_save is zero-indexed
822  }
823  }
824 
826  const Grid<DeviceType>& grid,
828  const VelocityGrid& vgrid);
829 
832 
833  void write_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
834  void write_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
835  void read_f0_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name);
836  void read_ptl_checkpoint_files(const DomainDecomposition<DeviceType>& pol_decomp, const XGC_IO_Stream& stream, std::string sp_name, bool n_ranks_is_same, int version);
838 
839  long long int get_max_gid() const;
840  void get_ptl_write_total_and_offsets(const DomainDecomposition<DeviceType>& pol_decomp, long long int& inum_total, long long int& ioff) const;
841 };
842 
843 #endif
KOKKOS_INLINE_FUNCTION double thermal_velocity(double mass, double temp_ev)
Definition: basic_physics.hpp:75
KOKKOS_INLINE_FUNCTION double normalized_v_para(double c_m, double mass, double B, double temp_ev, double rho)
Definition: basic_physics.hpp:97
KOKKOS_INLINE_FUNCTION double normalized_sqrt_mu(double B, double temp_ev, double mu)
Definition: basic_physics.hpp:107
int nnodes
Number of nodes belonging to this MPI rank.
Definition: domain_decomposition.hpp:96
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:95
Definition: profile.hpp:171
KOKKOS_INLINE_FUNCTION void wedge_modulo_phi(Simd< double > &phi_mod) const
Definition: grid.tpp:100
KOKKOS_INLINE_FUNCTION void get_grid_weights(const MagneticField< Device > &magnetic_field, const SimdVector &v, const Simd< double > &psi, SimdVector2D &xff, SimdGridWeights< Order::One, PIT > &grid_wts) const
Definition: grid.tpp:32
Definition: gyro_avg_mat.hpp:18
Definition: magnetic_field.hpp:12
Definition: NamelistReader.hpp:199
Definition: particles.hpp:248
Definition: species.hpp:63
int n_backup_particles
Definition: species.hpp:114
void read_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name, bool n_ranks_is_same, int version)
Definition: species.cpp:671
Eq::Profile< Device > eq_fg_temp
Definition: species.hpp:129
void read_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:766
Eq::Profile< Device > eq_den
Definition: species.hpp:125
double c2_2m
c2/2m
Definition: species.hpp:75
void copy_phase0_to_device_if_not_resident()
Definition: species.hpp:620
Particles::Array< PhaseDataTypes, Device > phase0_d
Definition: species.hpp:109
void restore_particles_from_backup()
Definition: species.hpp:653
void resize_device_particles(int new_n_ptl)
Definition: species.hpp:298
int nonadiabatic_idx
Index of species skipping adiabatic species (for compatibility with fortran arrays)
Definition: species.hpp:69
bool backup_particles_on_device
Definition: species.hpp:115
int get_max_n_ptl()
Definition: species.hpp:731
double c_m
c/m
Definition: species.hpp:74
long long int get_max_gid() const
Definition: species.cpp:573
View< int *, CLayout, Device > tr_save
Definition: species.hpp:118
KOKKOS_INLINE_FUNCTION VecPhase * ph0() const
Definition: species.hpp:595
KOKKOS_INLINE_FUNCTION double get_f0_fg_unit_velocity_lnode_h(int inode) const
Definition: species.hpp:769
int ncycles
Number of subcycles.
Definition: species.hpp:87
void resize_host_particles_to_match_device()
Definition: species.hpp:260
Particles::Array< ParticleDataTypes, Device > particles_d
Particles on device.
Definition: species.hpp:95
void restore_backup_SoA(Particles::Array< ParticleDataTypes, Device > &backup_SoA, int offset, int n) const
Definition: species.hpp:457
int collision_grid_index
Which collision grid to use.
Definition: species.hpp:122
bool particles_are_backed_up
Whether particles are currently backed up.
Definition: species.hpp:105
Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, int ncycles_in)
Definition: species.cpp:11
int idx
Index in all_species.
Definition: species.hpp:66
int n_ptl
Number of particles.
Definition: species.hpp:91
void save_backup_particles()
Definition: species.hpp:627
void set_buffer_particles_d()
Definition: species.hpp:370
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const
Definition: species.hpp:753
Particles::Array< ParticleDataTypes, HostType > backup_particles
Copy of particles to be restored for RK2.
Definition: species.hpp:112
double charge_eu
Particle charge in eu.
Definition: species.hpp:73
Eq::Profile< Device > eq_flow
Definition: species.hpp:126
Eq::Profile< Device > eq_mk_den
Definition: species.hpp:134
Species(SpeciesType sp_type, int n_ptl)
Definition: species.hpp:149
bool particles_resident_on_device
Whether the particles can reside on device.
Definition: species.hpp:99
MarkerType marker_type
Marker type: reduced delta-f, total-f, full-f, or none (placeholder for adiabatic species)
Definition: species.hpp:77
void copy_to_phase0(Species< Device > &species)
Definition: species.hpp:599
double charge
Particle charge.
Definition: species.hpp:72
int eq_flow_type
Definition: species.hpp:127
Eq::Profile< Device > eq_fg_flow
Definition: species.hpp:130
GyroAverageMatrices< Device > gyro_avg_matrices
Definition: species.hpp:139
bool maxwellian_init
whether initial distribution is maxwellian
Definition: species.hpp:85
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const
Definition: species.hpp:765
int eq_fg_flow_type
Definition: species.hpp:131
KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices< Device > &inds, SimdParticles &part_one) const
Definition: species.hpp:707
int ncycles_between_sorts
Number of subcycles between sorts.
Definition: species.hpp:88
int minimum_ptl_reservation
The minimum reservation size for particles.
Definition: species.hpp:90
bool owns_particles_d
Whether the species owns the device particle allocation right now.
Definition: species.hpp:97
double mass
Particle mass.
Definition: species.hpp:71
RKRestorationMethod RK_restoration_method
Currently, electrons must use first method and ions must use second.
Definition: species.hpp:103
KOKKOS_INLINE_FUNCTION double eq_flow_ms(const MagneticField< DeviceType > &magnetic_field, double psi_in, double r, double z, double bphi_over_b) const
Definition: species.hpp:807
KOKKOS_INLINE_FUNCTION void get_tr_save(int i_item, Simd< int > &itr) const
Definition: species.hpp:817
KOKKOS_INLINE_FUNCTION VecParticles * ptl() const
Definition: species.hpp:591
Eq::Profile< Device > eq_mk_temp
Definition: species.hpp:133
LaunchBounds
Definition: species.hpp:412
void copy_particles_to_device_if_not_resident()
Definition: species.hpp:353
void copy_particles_from_device_if_not_resident()
Definition: species.hpp:359
bool is_adiabatic
Whether this species is adiabatic.
Definition: species.hpp:68
bool stream_particles
Whether to stream particles between host and device if possible.
Definition: species.hpp:100
void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const
Definition: species.hpp:490
KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const DomainDecomposition< DeviceType > &pol_decomp, SimdParticles &part, Simd< double > &smu, Simd< double > &vp, Simd< int > &nearest_node, Simd< bool > &not_in_triangle, Simd< bool > &not_in_poloidal_domain) const
Definition: species.hpp:774
Particles::Array< ParticleDataTypes, HostType > particles
Particles.
Definition: species.hpp:92
long long int get_total_n_ptl()
Definition: species.hpp:720
static int get_initial_n_ptl(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx, bool verbose)
Definition: species.cpp:100
void resize_device_particles()
Definition: species.hpp:283
void copy_particles_to_device()
Definition: species.hpp:312
void for_all_particles(const std::string label, F lambda_func) const
Definition: species.hpp:423
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const
Definition: species.hpp:759
void copy_particles_from_device_if_resident()
Definition: species.hpp:347
void back_up_SoA(Particles::Array< ParticleDataTypes, Device > &backup_SoA, int offset, int n) const
Definition: species.hpp:428
void update_decomposed_f0_calculations(const DomainDecomposition< DeviceType > &pol_decomp, const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const VelocityGrid &vgrid)
Definition: species.cpp:434
KOKKOS_INLINE_FUNCTION double get_fg_gyro_radius(int inode, double smu_n, double bfield) const
Definition: species.hpp:746
void clear_backup_phase()
Definition: species.hpp:700
Distribution< Device > f0
Species distribution in velocity space on local mesh nodes.
Definition: species.hpp:120
Eq::Profile< Device > eq_mk_flow
Definition: species.hpp:135
void move_phase0_from_device_if_not_resident()
Definition: species.hpp:691
static std::vector< MemoryPrediction > estimate_memory_usage(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx)
Definition: species.cpp:45
void set_buffer_phase0_d()
Definition: species.hpp:399
WeightEvoEq weight_evo_eq
Definition: species.hpp:79
bool phase0_is_stored() const
Definition: species.hpp:616
KinType kintype
Whether the species is gyrokinetic or drift kinetic.
Definition: species.hpp:70
FAnalyticShape f_analytic_shape
f_analytic_shape shape: Maxwellian, SlowingDown or None
Definition: species.hpp:78
Eq::Profile< Device > eq_temp
Definition: species.hpp:124
bool is_electron
Whether this species is the electrons.
Definition: species.hpp:67
void write_f0_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:730
void initialize_global_f0_arrays(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field)
Definition: species.cpp:500
Species(int n_ptl_in)
Definition: species.hpp:185
void for_all_particles(const std::string label, F lambda_func, const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default)
Definition: species.hpp:541
void unassign_host_particles()
Definition: species.hpp:277
Particles::Array< PhaseDataTypes, HostType > phase0
Definition: species.hpp:108
void copy_particles_from_device()
Definition: species.hpp:328
void copy_particles_to_device_if_resident()
Definition: species.hpp:341
Particles::Array< ParticleDataTypes, Device > backup_particles_d
Copy of particles to be restored for RK2.
Definition: species.hpp:113
void resize_particles(int new_n_ptl)
Definition: species.hpp:240
bool dynamic_f0
Whether f0 can evolve in time.
Definition: species.hpp:84
int eq_mk_flow_type
Definition: species.hpp:136
void get_ptl_write_total_and_offsets(const DomainDecomposition< DeviceType > &pol_decomp, long long int &inum_total, long long int &ioff) const
Definition: species.cpp:596
void write_ptl_checkpoint_files(const DomainDecomposition< DeviceType > &pol_decomp, const XGC_IO_Stream &stream, std::string sp_name)
Definition: species.cpp:621
void read_initial_distribution(NLReader::NamelistReader &nlr, const DomainDecomposition< DeviceType > &pol_decomp)
Definition: species.cpp:803
Definition: xgc_io.hpp:24
constexpr double EV_2_J
Conversion rate ev to J.
Definition: constants.hpp:5
constexpr double UNIT_CHARGE
Charge of an electron (C)
Definition: constants.hpp:4
constexpr double PROTON_MASS
Definition: constants.hpp:7
void exit_XGC(std::string msg)
Definition: globals.hpp:38
@ PTL_NPHASE
Definition: globals.hpp:235
FAnalyticShape
Definition: globals.hpp:139
SpeciesType
Definition: globals.hpp:106
@ ELECTRON
Definition: globals.hpp:107
KOKKOS_INLINE_FUNCTION int divide_and_round_up(int a, int b)
Definition: globals.hpp:249
WeightEvoEq
Definition: globals.hpp:144
KinType
Definition: globals.hpp:111
@ GyroKin
Definition: globals.hpp:113
@ DriftKin
Definition: globals.hpp:112
@ PTL_NCONST
Definition: globals.hpp:245
MarkerType
Definition: globals.hpp:133
int SML_COMM_RANK
Definition: my_mpi.cpp:5
MPI_Comm SML_COMM_WORLD
Definition: my_mpi.cpp:4
void deep_copy(const Array< DataType, Device > &dest, const Array< DataType, Device2 > &src)
Definition: particles.hpp:310
Option
Definition: streamed_parallel_for.hpp:13
@ NoReturn
Definition: streamed_parallel_for.hpp:16
@ NoSend
Definition: streamed_parallel_for.hpp:14
@ Normal
Definition: streamed_parallel_for.hpp:15
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Definition: magnetic_field.F90:1
logical false
Definition: module.F90:103
logical true
Definition: module.F90:103
long long int add_vec_buffer(T n_ptl)
Definition: particles.hpp:190
int p_range< DeviceType >(int num_particle)
Definition: particles.hpp:182
constexpr static const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t Async
Definition: space_settings.hpp:83
void set_min_max_num(int isp, int n_ptl)
bool default_residence_option()
Definition: species.hpp:32
RKRestorationMethod
Definition: species.hpp:56
@ RestoreOnOriginalRank
Definition: species.hpp:57
@ BringOldPhaseToNewRank
Definition: species.hpp:58
bool default_streaming_option()
Definition: species.hpp:25
void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles *ptl)
void adjust_n_ptl_for_core_ptl(int *n_ptl)
Definition: particles.hpp:143
int s
The index in the outer array of the AoSoA.
Definition: particles.hpp:144
int a
The index in the inner array of the AoSoA.
Definition: particles.hpp:145
Definition: distribution.hpp:11
Definition: species.hpp:36
SendOpt send_opt
Definition: species.hpp:50
ReturnOpt return_opt
Definition: species.hpp:51
SendOpt
Definition: species.hpp:38
@ NoSend
Definition: species.hpp:39
@ SendIfNotResident
Definition: species.hpp:41
@ Send
Definition: species.hpp:40
PtlMvmt(SendOpt send_opt, ReturnOpt return_opt)
Definition: species.hpp:53
ReturnOpt
Definition: species.hpp:44
@ ReturnIfNotResident
Definition: species.hpp:47
@ Return
Definition: species.hpp:46
@ NoReturn
Definition: species.hpp:45
Simd< double > mu
m*v_perp^2/(2B)
Definition: particles.hpp:41
Definition: grid_weights.hpp:47
Definition: particles.hpp:50
SimdPhase ph
Definition: particles.hpp:51
SimdConstants ct
Definition: particles.hpp:52
Simd< double > w2
(1 - background distribution)/f0
Definition: particles.hpp:12
Simd< double > w1
delta-f weight
Definition: particles.hpp:11
Simd< double > rho
m*v_para/(q*B) - A_para^h/B (should it be plus or minus?)
Definition: particles.hpp:10
Simd< double > z
Cylindrical coordinate Z.
Definition: particles.hpp:8
Simd< double > r
Cylindrical coordinate R (major radial direction)
Definition: particles.hpp:7
Simd< double > phi
Cylindrical coordinate phi (toroidal direction)
Definition: particles.hpp:9
KOKKOS_INLINE_FUNCTION SimdVector & v()
Definition: particles.hpp:28
Definition: particles.hpp:104
VecPhase ph
Definition: particles.hpp:105
Definition: particles.hpp:84
double r[VEC_LEN]
Definition: particles.hpp:85
double w2[VEC_LEN]
Definition: particles.hpp:90
double phi[VEC_LEN]
Definition: particles.hpp:87
double rho[VEC_LEN]
Definition: particles.hpp:88
double w1[VEC_LEN]
Definition: particles.hpp:89
double z[VEC_LEN]
Definition: particles.hpp:86
Definition: velocity_grid.hpp:8
#define TIMER(N, F)
Definition: timer_macro.hpp:24