XGC1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
species.hpp
Go to the documentation of this file.
1 #ifndef SPECIES_HPP
2 #define SPECIES_HPP
3 #include <Cabana_AoSoA.hpp>
4 #include <Cabana_DeepCopy.hpp>
5 #include <Kokkos_Core.hpp>
6 #include "NamelistReader.hpp"
7 #include "timer_macro.hpp"
8 #include "magnetic_field.hpp"
9 #include "grid.hpp"
10 #include "domain_decomposition.hpp"
11 #include "particles.hpp"
12 #include "space_settings.hpp"
13 #include "distribution.hpp"
14 #include "profile.hpp"
15 #include "gyro_avg_mat.hpp"
17 #include "basic_physics.hpp"
18 #include "memory_prediction.hpp"
19 
20 extern "C" void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles* ptl);
21 extern "C" void set_min_max_num(int isp, int n_ptl);
22 extern "C" void adjust_n_ptl_for_core_ptl(int* n_ptl);
23 
28  return false;
29 }
30 
35  return false;
36 }
37 
38 // Used for Cabana slices (getting one particle property at a time)
39 namespace PtlSlice{
40 #ifdef ESC_PTL
41 enum{Ph=0,Ct,Gid,Flag};
42 #else
43 enum{Ph=0,Ct,Gid};
44 #endif
45 }
46 
47 struct PtlMvmt{
48  // Options for particle location management when looping over particles
49  enum SendOpt{
50  NoSend=0,
53  };
54 
55  enum ReturnOpt{
59  };
60 
63 
64  PtlMvmt(SendOpt send_opt, ReturnOpt return_opt) : send_opt(send_opt), return_opt(return_opt){}
65 };
66 
70 };
71 
72 // Species class
73 template<class Device>
74 class Species{
75  public:
76 
77  int idx;
78  bool is_electron;
79  bool is_adiabatic;
82  double mass;
83  double charge;
84  double charge_eu;
85  double c_m;
86  double c2_2m;
87 
88  bool is_deltaf;
89 
90  int ncycles;
92 
94  int n_ptl;
95  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> particles;
96 
97  // Device particles
98  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> particles_d;
99 
101 
104 
105  /*** Could be its own class inside species? ***/
109 
110  // phase0 (for ion restoration)
111  Cabana::AoSoA<PhaseDataTypes,HostType,VEC_LEN> phase0;
112  Cabana::AoSoA<PhaseDataTypes,Device,VEC_LEN> phase0_d;
113 
114  // For electron restoration
115  Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN> backup_particles;
116  int n_backup_particles; // Number of particles stored in backup_particles (can't be deduced from its size due to buffer)
117  /****/
118 
120 
122 
123  Eq::Profile<Device> eq_temp; // Equilibrium temperature
124  Eq::Profile<Device> eq_den; // Equilibrium density
125  Eq::Profile<Device> eq_flow; // Equilibrium flow
126  int eq_flow_type; // Type of Equilibirum flow
127 
129 
130  /*** Constructors ***/
131 
132  Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, bool is_deltaf_in,
133  int ncycles_in);
134 
135  Species(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const MagneticField<DeviceType> &magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, int idx_in, int nonadiabatic_idx_in);
136 
137  // Electron or ion default constructor
138  Species(SpeciesType sp_type, int n_ptl)
139  : idx(sp_type==ELECTRON ? 0 : 1),
140  is_electron(sp_type==ELECTRON),
141  mass(is_electron ? 3.344e-30 : PROTON_MASS),
143  charge_eu(is_electron ? -1.0 : 1.0),
144  is_deltaf(true),
145  is_adiabatic(false),
146  nonadiabatic_idx(idx), // Since is_adiabatic is false above
148  ncycles(is_electron ? 70 : 1),
149  c_m(charge/mass),
150  c2_2m(0.5*charge*charge/mass),
152  n_ptl(n_ptl),
153  backup_particles("backup_particles", 0),
154  particles("particles", add_vec_buffer(n_ptl)),
157  eq_temp(1.0e3,-0.1),
158  eq_den(1.0e19,-0.1),
159  eq_flow_type(2),
160  owns_particles_d(false),
164 
165  // Special constructor for tests that involve tracking particles in memory
166  // The idea is to use these particles to test functions that reorder particles,
167  // e.g. sort, shift, and cleaning
168  Species(int n_ptl_in)
169  : n_ptl(n_ptl_in),
170  is_electron(true),
171  is_adiabatic(false),
172  particles("particles", add_vec_buffer(n_ptl_in)),
174  owns_particles_d(false),
179  {
180 
181  // Slice particle properties
182  auto ph = Cabana::slice<PtlSlice::Ph>(particles);
183  auto ct = Cabana::slice<PtlSlice::Ct>(particles);
184  auto gid = Cabana::slice<PtlSlice::Gid>(particles);
185 #ifdef ESC_PTL
186  auto flag = Cabana::slice<PtlSlice::Flag>(particles);
187 #endif
188 
189  // Offset gid if using MPI
190 #ifdef USE_MPI
191  long long int gid_offset = n_ptl*SML_COMM_RANK;
192 #else
193  long long int gid_offset = 0;
194 #endif
195 
196  // Assign trackable values
197  for (int i=0;i<n_ptl;i++){
198  // Set GID in order
199  gid(i) = gid_offset + i+1; // 1-indexed
200 
201  // Value of properties is gid + 0.1*(property index)
202  // First particle: (1.0, 1.1, ... 1.8)
203  // Second particle: (2.0, 2.1, ... 2.8)
204  for (int j=0;j<6;j++) ph(i, j) = gid(i) + (j)*0.1;
205  for (int j=0;j<3;j++) ct(i, j) = gid(i) + (j+6)*0.1;
206  }
207 
208  // Buffer particles: same but with gid = -1
209  if(n_ptl>0){
210  for (int i=n_ptl;i<add_vec_buffer(n_ptl);i++){
211  gid(i) = -1;
212  for (int j=0;j<6;j++) ph(i, j) = gid(i) + (j)*0.1;
213  for (int j=0;j<3;j++) ct(i, j) = gid(i) + (j+6)*0.1;
214  }
215  }
216  }
217 
218  static std::vector<MemoryPrediction> estimate_memory_usage(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int species_idx);
219 
220  static int get_initial_n_ptl(NLReader::NamelistReader& nlr, const Grid<DeviceType> &grid, const DomainDecomposition<DeviceType>& pol_decomp, int sml_special, int species_idx, bool verbose);
221 
222  void resize_particles(int new_n_ptl){
223  n_ptl = new_n_ptl;
224 
225 #ifndef USE_GPU
226  // If CPU-only, particles_d points to the same location as particles. If particles is resized, then Cabana will not deallocate the first allocation
227  // since it is still used by particles_d. So, reset particles_d before resize, and point it back to particles only afterwards
228  particles_d = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>();
229 #endif
230 
231  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
232  particles.resize(add_vec_buffer(n_ptl));
233 
234 #ifndef USE_GPU
235  // Point particles_d back to particles after resize
237 #endif
238 
240  }
241 
243 #ifdef USE_GPU
244  particles.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
245  particles.resize(particles_d.size());
246 #else
247  // Point particles back to particles_d after resize
249 #endif
250 
252  }
253 
254  /* If using CPU-only, then "device" particles are a shallow copy of host particles so that
255  * there is no unnecessary duplication. When "device" particles are resized, Cabana will keep the
256  * original allocation if there is a second reference (i.e. host particles).
257  * To resolve this, we free the host particles here so that there is no second reference
258  * */
260  particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>();
261  }
262 
263  /* Resizes device particles if on GPU, or just creates a shallow copy if CPU only
264  * */
266  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
267 
268 #ifdef USE_GPU
269  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
270  // Resize device particles to match n particles
272 #else
273  // If kernels are on CPU, do shallow copy
275 #endif
276  }
277 
278  /* Resizes device particles
279  * */
280  void resize_device_particles(int new_n_ptl){
281  if(!owns_particles_d) exit_XGC("\nSpecies tried to resize device particles, but doesn't own the device array.");
282 
283  n_ptl = new_n_ptl;
284 
285  particles_d.reserve(minimum_ptl_reservation); // Can only raise reservation (no-op if AoSoA is already larger)
286 
287  // Resize device particles to match host particles
289  }
290 
291  /* Copies particles to device - deep copy if using GPU, otherwise shallow copy
292  * Also takes the opportunity to set the buffer particles to realistic values
293  * */
295  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles to device, but doesn't own the device array.");
296 
297 #ifdef PRINT_COPA_ARTIFACTS
298  if(is_rank_zero()) printf("\nCopying Cabana AoSoA particle data to GPU with Cabana::deep_copy\n");
299 #endif
300 
301 #ifdef USE_GPU
302  // Copy to device
303  Cabana::deep_copy(particles_d, particles);
304 #else
305  // No operation required if CPU-only
306 #endif
307 
308  // Copy last particle to fill remainder of trailing vector in AoSoA
310  }
311 
312  /* Copies particles from device - deep copy if using GPU, otherwise no copy is necessary
313  * */
315  if(!owns_particles_d) exit_XGC("\nSpecies tried to copy particles from device, but doesn't own the device array.");
316 
317 #ifdef PRINT_COPA_ARTIFACTS
318  if(is_rank_zero()) printf("\nCopying Cabana AoSoA particle data from GPU with Cabana::deep_copy\n");
319 #endif
320 
321 #ifdef USE_GPU
322  // Copy particles to host
323  Cabana::deep_copy(particles, particles_d);
324 #else
325  // No operation required if CPU-only
326 #endif
327  }
328 
329  /* Copies particles to device if they are resident on the device
330  * */
333  }
334 
335  /* Copies particles from device if they are resident on the device
336  * */
339  }
340 
341  /* Copies particles to device if they are NOT resident on the device
342  * */
345  }
346 
347  /* Copies particles from device if they are NOT resident on the device
348  * */
351  }
352 
361  if (n_ptl>0){
362  int last_ptl_index = n_ptl - 1;
363  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
364  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
365  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
366 #ifdef ESC_PTL
367  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
368 #endif
369 
370  Kokkos::parallel_for("set_buffer_particles_d", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
371  // Buffer particles: same as last particle, gid = -1
372  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
373  for (int j=0;j<3;j++) ct(i, j) = ct(last_ptl_index, j);
374  gid(i) = -1;
375 #ifdef ESC_PTL
376  flag(i) = flag(last_ptl_index);
377 #endif
378  });
379  }
380  }
381 
390  if (n_ptl>0){
391  int last_ptl_index = n_ptl - 1;
392  auto ph = Cabana::slice<PtlSlice::Ph>(phase0_d);
393 
394  Kokkos::parallel_for("set_buffer_phase0", Kokkos::RangePolicy<ExSpace>( n_ptl, add_vec_buffer(n_ptl) ), KOKKOS_LAMBDA( const int i ){
395  // copy final real particle
396  for (int j=0;j<6;j++) ph(i, j) = ph(last_ptl_index, j);
397  });
398  }
399  }
400 
401  // Options for custom launch bounds since kokkos defaults are suboptimal for electron push kernel
402  enum class LaunchBounds{
403  Default,
404  Custom
405  };
406 
412  template<typename F>
413  inline void for_all_particles(const std::string label, F lambda_func) const {
414 #ifdef PRINT_COPA_ARTIFACTS
415  if(is_rank_zero()) printf("\nLaunching GPU kernel '%s' on Cabana AoSoA of particles\n", label.c_str());
416 #endif
417  Kokkos::RangePolicy<ExSpace> particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
418  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
419  }
420 
421  inline void back_up_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
422  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
423  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
424  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
425 #ifdef ESC_PTL
426  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
427 #endif
428 
429  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
430  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
431  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
432 #ifdef ESC_PTL
433  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
434 #endif
435 
436  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
437  int i_offset = i + offset;
438  // Make backup copy
439  for (int j=0;j<6;j++) ph_b(i, j) = ph(i_offset, j);
440  for (int j=0;j<3;j++) ct_b(i, j) = ct(i_offset, j);
441  gid_b(i) = gid(i_offset);
442 #ifdef ESC_PTL
443  flag_b(i) = flag(i_offset);
444 #endif
445  // Deactivate
446  gid(i_offset) = -1;
447  });
448  }
449 
450  inline void restore_backup_SoA(Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>& backup_SoA, int offset, int n) const{
451  auto ph_b = Cabana::slice<PtlSlice::Ph>(backup_SoA);
452  auto ct_b = Cabana::slice<PtlSlice::Ct>(backup_SoA);
453  auto gid_b = Cabana::slice<PtlSlice::Gid>(backup_SoA);
454 #ifdef ESC_PTL
455  auto flag_b = Cabana::slice<PtlSlice::Flag>(backup_SoA);
456 #endif
457 
458  auto ph = Cabana::slice<PtlSlice::Ph>(particles_d);
459  auto ct = Cabana::slice<PtlSlice::Ct>(particles_d);
460  auto gid = Cabana::slice<PtlSlice::Gid>(particles_d);
461 #ifdef ESC_PTL
462  auto flag = Cabana::slice<PtlSlice::Flag>(particles_d);
463 #endif
464 
465  Kokkos::parallel_for("backup_first_soa", Kokkos::RangePolicy<ExSpace>( 0, n ), KOKKOS_LAMBDA( const int i ){
466  int i_offset = i + offset;
467  // Restore from backup copy
468  for (int j=0;j<6;j++) ph(i_offset, j) = ph_b(i, j);
469  for (int j=0;j<3;j++) ct(i_offset, j) = ct_b(i, j);
470  gid(i_offset) = gid_b(i);
471 #ifdef ESC_PTL
472  flag(i_offset) = flag_b(i);
473 #endif
474  });
475  }
476 
482  template<typename F>
483  inline void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const {
484  if(end_idx <= begin_idx) return; // Return if range is 0 or less
485 
486  // Still need the subset to line up with the AoSoA vector length
487  int first_soa = begin_idx/VEC_LEN;
488  int n_other_ptl_in_first_soa = begin_idx - first_soa*VEC_LEN;
489  bool first_soa_is_partial = (n_other_ptl_in_first_soa>0);
490  int last_soa = (end_idx-1)/VEC_LEN;
491  int n_other_ptl_in_last_soa = (last_soa+1)*VEC_LEN - end_idx;
492  bool last_soa_is_partial = (n_other_ptl_in_last_soa>0);
493 
494 #ifdef USE_GPU
495  int first_item_in_shifted_range = first_soa*VEC_LEN;
496 #else
497  int first_item_in_shifted_range = first_soa;
498 #endif
499 
500  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_first_soa;
501  Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN> ptl_last_soa;
502  if(first_soa_is_partial){
503  // Make a backup of the first SoA and set the particles_d GIDs to -1
504  ptl_first_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_first_soa", n_other_ptl_in_first_soa);
505  back_up_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
506  }
507  if(last_soa_is_partial){
508  // Make a backup of the last SoA and set the particles_d GIDs to -1
509  ptl_last_soa = Cabana::AoSoA<ParticleDataTypes,Device,VEC_LEN>("ptl_last_soa", n_other_ptl_in_last_soa);
510  back_up_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
511  }
512 
513  // Finally, do the parallel_for
514  Kokkos::RangePolicy<ExSpace> particle_range_policy( first_item_in_shifted_range, p_range<DeviceType>(end_idx) );
515  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
516 
517  // Restore from backup
518  if(first_soa_is_partial){
519  restore_backup_SoA(ptl_first_soa, first_soa*VEC_LEN, n_other_ptl_in_first_soa);
520  }
521  if(last_soa_is_partial){
522  restore_backup_SoA(ptl_last_soa, end_idx, n_other_ptl_in_last_soa);
523  }
524  }
525 
533  template<typename F>
534  inline void for_all_particles(const std::string label, F lambda_func,
535  const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default) {
536  if(!owns_particles_d) exit_XGC("\nSpecies tried to loop over particles on device, but doesn't own the device array.");
537 
538 #ifdef PRINT_COPA_ARTIFACTS
539  if(is_rank_zero()) printf("\nLaunching GPU kernel '%s' on Cabana AoSoA of particles\n", label.c_str());
540 #endif
541 
542  bool use_streaming = stream_particles;
543 #ifndef USE_STREAMS
544  use_streaming = false; // Just to be safe, turn streams off here
545 #endif
546 
547  bool send_ptl = ( mvmt.send_opt==PtlMvmt::Send ||
549  bool return_ptl = ( mvmt.return_opt==PtlMvmt::Return ||
551 
552  // Don't need to stream if particles are already present and don't need to be returned
553  if((!send_ptl) && (!return_ptl)) use_streaming = false;
554 
555  if(use_streaming){
556 #ifdef USE_STREAMS
557  Streamed::Option stream_option = Streamed::Normal; // Send to device and back
558  if(!send_ptl) stream_option = Streamed::NoSend;
559  if(!return_ptl) stream_option = Streamed::NoReturn;
560 
561  // Execute streaming parallel_for
562  Streamed::parallel_for(label, n_ptl, lambda_func, stream_option, particles, particles_d);
563 #endif
564  }else{
565  if(send_ptl) TIMER("copy_ptl_to_device",copy_particles_to_device() );
566 
567  if (launch_bounds==LaunchBounds::Custom) {
568 #ifdef USE_EPUSH_LAUNCH_BOUNDS
569 # if !defined(PUSH_MAX_THREADS_PER_BLOCK) || !defined(PUSH_MIN_WARPS_PER_EU)
570 # error "USE_EPUSH_LAUNCH_BOUNDS requires PUSH_MAX_THREADS_PER_BLOCK and PUSH_MIN_WARPS_PER_EU to be defined"
571 # endif
572  Kokkos::RangePolicy<ExSpace, Kokkos::LaunchBounds<PUSH_MAX_THREADS_PER_BLOCK, PUSH_MIN_WARPS_PER_EU>>
573  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
574  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
575 #else
576  exit_XGC("\nERROR: LaunchBounds::Custom specified, but USE_EPUSH_LAUNCH_BOUNDS is not defined\n");
577 #endif
578  } else {
579  Kokkos::RangePolicy<ExSpace>
580  particle_range_policy( 0, p_range<DeviceType>(n_ptl) );
581  Kokkos::parallel_for(label, Opt::require(particle_range_policy, Async), lambda_func);
582  }
583 
584  if(return_ptl) TIMER("copy_ptl_from_device", copy_particles_from_device() );
585  }
586  }
587 
588  KOKKOS_INLINE_FUNCTION VecParticles* ptl() const{
589  return (VecParticles*)(&particles_d.access(0));
590  }
591 
592  KOKKOS_INLINE_FUNCTION VecPhase* ph0() const{
593  return (VecPhase*)(&phase0_d.access(0));
594  }
595 
597  for_all_particles("copy_to_phase0", KOKKOS_LAMBDA( const int idx ){
598  AoSoAIndices<DeviceType> inds(idx);
599  VecParticles* ptl_loc = species.ptl();
600  VecPhase* ph0_loc = species.ph0();
601  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
602  int p_vec = inds.a + i_simd;
603  ph0_loc[inds.s].r[p_vec] = ptl_loc[inds.s].ph.r[p_vec];
604  ph0_loc[inds.s].z[p_vec] = ptl_loc[inds.s].ph.z[p_vec];
605  ph0_loc[inds.s].phi[p_vec] = ptl_loc[inds.s].ph.phi[p_vec];
606  ph0_loc[inds.s].rho[p_vec] = ptl_loc[inds.s].ph.rho[p_vec];
607  ph0_loc[inds.s].w1[p_vec] = ptl_loc[inds.s].ph.w1[p_vec];
608  ph0_loc[inds.s].w2[p_vec] = ptl_loc[inds.s].ph.w2[p_vec];
609  }
610  });
611  }
612 
615  // If particles are resident on the device, then use the host particle allocation as the backup
616  // Otherwise, resize the backup_particle array
619  }else{
620  backup_particles.resize(particles_d.size());
621  }
622 
623  // Copy particle data to backup
624  Cabana::deep_copy(backup_particles, particles_d);
626  } else {
627  // For ions, save phase to phase0
628  phase0_d.resize(particles_d.size());
629  copy_to_phase0(*this); // Separate function to avoid implicit copy into lambda
630  }
632  }
633 
636  // If particles are resident on device, don't need to resize host particles since they are already used as the backup.
637  // If not, resize host particle array to fit backup particles
641  }else{
642  // If particles are not resident on device, resize host particle array to fit backup particles
644  }
645 
646  // Resize device particle array to fit backed up particles
648 
649  // Restore particle data from backup
650  Cabana::deep_copy(particles_d, backup_particles);
651 
653  // If the backup particles are simply pointing to the host particles, then
654  // point them to their own 0-sized allocation when finished using them
655  // so that they don't make a copy when host particles get resized
656  backup_particles = Cabana::AoSoA<ParticleDataTypes,HostType,VEC_LEN>("backup_particles", 0);
657  }
658  } else {
659  // When ipc==2, copy phase0 to device
660  // First, resize device phase0 and reset pointer
661  phase0_d.resize(phase0.size());
662 
663  // Next copy data to phase0 on device
664  Cabana::deep_copy(phase0_d, phase0);
665 
666  // Fill buffer with realistic ptl data
668  }
669  particles_are_backed_up = false;
670  }
671 
672  KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices<Device>& inds, SimdParticles& part_one) const {
673  VecPhase* ph0_loc = ph0();
674  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
675  int p_vec = inds.a + i_simd;
676  part_one.ph.r[i_simd] = ph0_loc[inds.s].r[p_vec];
677  part_one.ph.z[i_simd] = ph0_loc[inds.s].z[p_vec];
678  part_one.ph.phi[i_simd] = ph0_loc[inds.s].phi[p_vec];
679  part_one.ph.rho[i_simd] = ph0_loc[inds.s].rho[p_vec];
680  part_one.ph.w1[i_simd] = ph0_loc[inds.s].w1[p_vec];
681  part_one.ph.w2[i_simd] = ph0_loc[inds.s].w2[p_vec];
682  }
683  }
684 
685  long long int get_total_n_ptl(){
686 #ifdef USE_MPI
687  long long int tmp_n_ptl = n_ptl;
688  long long int out_n_ptl = 0;
689  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_LONG_LONG_INT, MPI_SUM, SML_COMM_WORLD);
690  return out_n_ptl;
691 #else
692  return (long long int)(n_ptl);
693 #endif
694  }
695 
697 #ifdef USE_MPI
698  int tmp_n_ptl = n_ptl;
699  int out_n_ptl = 0;
700  MPI_Allreduce(&tmp_n_ptl, &out_n_ptl, 1, MPI_INT, MPI_MAX, SML_COMM_WORLD);
701  return out_n_ptl;
702 #else
703  return n_ptl;
704 #endif
705  }
706 
707  // Gets the gyro_radius of a species based on equilibrium temperature
708  // inode is the LOCAL (poloidally decomposed) grid node index to get temperature
709  // smu_n is the normalized sqrt(mu)
710  // bfield is the magnetic field at inode
711  KOKKOS_INLINE_FUNCTION double get_f0_eq_gyro_radius(int inode, double smu_n, double bfield) const{
712  // Should replace UNIT_CHARGE*charge_eu with charge(?)
713  return smu_n*sqrt(mass*f0.temp_ev(inode)*EV_2_J) / (UNIT_CHARGE*charge_eu*bfield);
714  }
715 
716  // Gets the equilibrium thermal velocity of a species based on f0 temperature
717  // inode is the GLOBAL node index to get temperature
718  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const{
719  return thermal_velocity(mass, f0.temp_global(inode));
720  }
721 
722  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on device
723  // inode is the local node index to get temperature
724  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const{
725  return thermal_velocity(mass, f0.temp_ev(inode));
726  }
727 
728  // Gets the equilibrium thermal velocity of a species based on f0 temperature, on host
729  // inode is the local node index to get temperature
730  KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const{
731  return thermal_velocity(mass, f0.temp_ev_h(inode));
732  }
733 
734  // Get species velocity
735  KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid<DeviceType>& grid, const MagneticField<DeviceType>& magnetic_field, const DomainDecomposition<DeviceType>& pol_decomp, SimdParticles& part, Simd<double>& smu, Simd<double>& vp, Simd<int>& nearest_node, Simd<bool>& not_in_triangle, Simd<bool>& not_in_poloidal_domain) const{
736 
737  // This modulo surely doesnt need to be here (at least, should be elsewhere).
738  // Modulo phi coordinate
739  grid.wedge_modulo_phi(part.ph.phi);
740 
741  // Get the triangle and p weights
742  SimdGridVec p;
743  Simd<int> itr = -1;
744  grid.charge_search_index(magnetic_field, part.ph.v(), itr, p);
745 
746  // Complain if the particle is outside the grid
747  grid.check_triangle(itr,not_in_triangle);
748 
749  // Determine which of the three indices the particle is closest to.
750  Simd<int> ml;
751  p.maxloc(ml);
752 
753  // Calculate psi (can get this from charge_search_index)
754  Simd<double> psi;
755  magnetic_field.get_psi(part.ph.x(),psi);
756 
757  Simd<double> bmag;
758  magnetic_field.bmag_interpol(part.ph.v(), bmag);
759 
760  for (int i_simd = 0; i_simd<SIMD_SIZE; i_simd++){
761  if(not_in_triangle[i_simd]) continue;
762 
763  nearest_node[i_simd]=grid.get_node_index(itr[i_simd], ml[i_simd]) - pol_decomp.node_offset;
764  not_in_poloidal_domain[i_simd] = (nearest_node[i_simd]<0 || nearest_node[i_simd]>=pol_decomp.nnodes);
765 
766  double temp_ev_norm = not_in_poloidal_domain[i_simd] ? f0.temp_ev(0) : f0.temp_ev(nearest_node[i_simd]);
767 
768  // get vp and smu
769  const double& B = bmag[i_simd];
770  vp[i_simd] = normalized_v_para(c_m, mass, B, temp_ev_norm, part.ph.rho[i_simd]);
771  smu[i_simd] = normalized_sqrt_mu(B, temp_ev_norm, part.ct.mu[i_simd]);
772  }
773  }
774 };
775 
776 #include "species.tpp"
777 #endif
Cabana::AoSoA< PhaseDataTypes, HostType, VEC_LEN > phase0
Definition: species.hpp:111
bool stream_particles
Whether to stream particles between host and device if possible.
Definition: species.hpp:103
Definition: globals.hpp:78
KOKKOS_INLINE_FUNCTION VecPhase * ph0() const
Definition: species.hpp:592
KOKKOS_INLINE_FUNCTION int divide_and_round_up(int a, int b)
Definition: globals.hpp:173
bool owns_particles_d
Whether the species owns the device particle allocation right now.
Definition: species.hpp:100
void back_up_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:421
KOKKOS_INLINE_FUNCTION double normalized_v_para(double c_m, double mass, double B, double temp_ev, double rho)
Definition: basic_physics.hpp:51
KOKKOS_INLINE_FUNCTION double get_f0_eq_gyro_radius(int inode, double smu_n, double bfield) const
Definition: species.hpp:711
subroutine adjust_n_ptl_for_core_ptl(n_ptl)
Definition: load.F90:453
void set_spall_num_and_ptr(int idx, int n_ptl, int n_vecs, VecParticles *ptl)
bool is_rank_zero()
Definition: globals.hpp:27
void for_particle_range(int begin_idx, int end_idx, const std::string label, F lambda_func) const
Definition: species.hpp:483
constexpr double PROTON_MASS
Definition: constants.hpp:7
Distribution< Device > f0
Species distribution in velocity space on local mesh nodes.
Definition: species.hpp:119
MPI_Comm SML_COMM_WORLD
Definition: my_mpi.cpp:4
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > backup_particles
Copy of particles to be restored for RK2.
Definition: species.hpp:115
bool is_electron
Whether this species is the electrons.
Definition: species.hpp:78
void for_all_particles(const std::string label, F lambda_func, const PtlMvmt mvmt, LaunchBounds launch_bounds=LaunchBounds::Default)
Definition: species.hpp:534
void save_backup_particles()
Definition: species.hpp:613
double c2_2m
c2/2m
Definition: species.hpp:86
double rho[VEC_LEN]
Definition: particles.hpp:72
void copy_to_phase0(Species< Device > &species)
Definition: species.hpp:596
Definition: species.hpp:57
Simd< double > w1
Definition: particles.hpp:22
double c_m
c/m
Definition: species.hpp:85
Definition: species.hpp:56
constexpr double EV_2_J
Conversion rate ev to J.
Definition: constants.hpp:5
bool default_streaming_option()
Definition: species.hpp:27
Eq::Profile< Device > eq_den
Definition: species.hpp:124
Definition: globals.hpp:83
KOKKOS_INLINE_FUNCTION VecParticles * ptl() const
Definition: species.hpp:588
KOKKOS_INLINE_FUNCTION double thermal_velocity(double mass, double temp_ev)
Definition: basic_physics.hpp:29
Definition: NamelistReader.hpp:193
KinType kintype
Whether the species is gyrokinetic or drift kinetic.
Definition: species.hpp:81
Definition: magnetic_field.hpp:12
int add_vec_buffer(int n_ptl)
Definition: particles.hpp:164
int idx
Index in all_species.
Definition: species.hpp:77
Definition: particles.hpp:68
int a
The index in the inner array of the AoSoA.
Definition: particles.hpp:120
Definition: particles.hpp:85
KOKKOS_INLINE_FUNCTION void bmag_interpol(const SimdVector &v, Simd< double > &bmag) const
Definition: magnetic_field.tpp:257
bool particles_are_backed_up
Whether particles are currently backed up.
Definition: species.hpp:108
int nonadiabatic_idx
Index of species skipping adiabatic species (for compatibility with fortran arrays) ...
Definition: species.hpp:80
bool default_residence_option()
Definition: species.hpp:34
int n_ptl
Number of particles.
Definition: species.hpp:94
KOKKOS_INLINE_FUNCTION void get_particle_velocity_and_nearest_node(const Grid< DeviceType > &grid, const MagneticField< DeviceType > &magnetic_field, const DomainDecomposition< DeviceType > &pol_decomp, SimdParticles &part, Simd< double > &smu, Simd< double > &vp, Simd< int > &nearest_node, Simd< bool > &not_in_triangle, Simd< bool > &not_in_poloidal_domain) const
Definition: species.hpp:735
Definition: streamed_parallel_for.hpp:16
KOKKOS_INLINE_FUNCTION void maxloc(Simd< int > &ml) const
Definition: grid_structs.hpp:16
int node_offset
Offset of first mesh node belonging to this MPI rank.
Definition: domain_decomposition.hpp:41
Definition: streamed_parallel_for.hpp:14
void set_buffer_phase0_d()
Definition: species.hpp:389
long long int get_total_n_ptl()
Definition: species.hpp:685
void set_buffer_particles_d()
Definition: species.hpp:360
Simd< double > rho
Definition: particles.hpp:21
Definition: species.hpp:58
int p_range< DeviceType >(int num_particle)
Definition: particles.hpp:157
int eq_flow_type
Definition: species.hpp:126
double charge_eu
Particle charge in eu.
Definition: species.hpp:84
KOKKOS_INLINE_FUNCTION void get_psi(const SimdVector2D &x, Simd< double > &psi_out) const
Definition: magnetic_field.tpp:82
Definition: species.hpp:50
int nnodes
Number of nodes belonging to this MPI rank.
Definition: domain_decomposition.hpp:42
void resize_particles(int new_n_ptl)
Definition: species.hpp:222
double mass
Particle mass.
Definition: species.hpp:82
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity(int inode) const
Definition: species.hpp:718
Species(int idx_in, int nonadiabatic_idx_in, bool is_electron_in, bool is_adiabatic_in, KinType kintype_in, double mass_in, double charge_in, double charge_eu_in, bool is_deltaf_in, int ncycles_in)
Definition: species.tpp:23
void for_all_particles(const std::string label, F lambda_func) const
Definition: species.hpp:413
Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > particles_d
Particles on device.
Definition: species.hpp:98
Definition: species.hpp:47
double w2[VEC_LEN]
Definition: particles.hpp:74
#define TIMER(N, F)
Definition: timer_macro.hpp:24
RKRestorationMethod
Definition: species.hpp:67
idx
Definition: diag_f0_df_port1.hpp:32
void copy_particles_to_device_if_not_resident()
Definition: species.hpp:343
RKRestorationMethod RK_restoration_method
Currently, electrons must use first method and ions must use second.
Definition: species.hpp:106
Simd< double > r
Definition: particles.hpp:18
KOKKOS_INLINE_FUNCTION SimdVector2D & x()
Definition: particles.hpp:27
void resize_host_particles_to_match_device()
Definition: species.hpp:242
Definition: species.hpp:68
KOKKOS_INLINE_FUNCTION SimdVector & v()
Definition: particles.hpp:39
ReturnOpt return_opt
Definition: species.hpp:62
KOKKOS_INLINE_FUNCTION int get_node_index(int triangle_index, int tri_vertex_index) const
Definition: grid.tpp:792
ReturnOpt
Definition: species.hpp:55
Option
Definition: streamed_parallel_for.hpp:13
void restore_particles_from_backup()
Definition: species.hpp:634
Definition: globals.hpp:84
static std::vector< MemoryPrediction > estimate_memory_usage(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int species_idx)
Definition: species.tpp:58
SendOpt send_opt
Definition: species.hpp:61
double charge
Particle charge.
Definition: species.hpp:83
SimdPhase ph
Definition: particles.hpp:59
void copy_particles_from_device()
Definition: species.hpp:314
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode(int inode) const
Definition: species.hpp:724
void copy_particles_from_device_if_not_resident()
Definition: species.hpp:349
KOKKOS_INLINE_FUNCTION void wedge_modulo_phi(Simd< double > &phi_mod) const
Definition: grid.tpp:673
void unassign_host_particles()
Definition: species.hpp:259
int ncycles_between_sorts
Number of subcycles between sorts.
Definition: species.hpp:91
Definition: particles.hpp:58
Cabana::AoSoA< PhaseDataTypes, Device, VEC_LEN > phase0_d
Definition: species.hpp:112
int SML_COMM_RANK
Definition: my_mpi.cpp:5
KinType
Definition: globals.hpp:82
Species(SpeciesType sp_type, int n_ptl)
Definition: species.hpp:138
bool is_deltaf
Whether this species is deltaf.
Definition: species.hpp:88
VecPhase ph
Definition: particles.hpp:86
Definition: grid_structs.hpp:7
Definition: species.hpp:43
Definition: species.hpp:69
void set_min_max_num(int isp, int n_ptl)
int minimum_ptl_reservation
The minimum reservation size for particles.
Definition: species.hpp:93
int s
The index in the outer array of the AoSoA.
Definition: particles.hpp:119
KOKKOS_INLINE_FUNCTION double normalized_sqrt_mu(double B, double temp_ev, double mu)
Definition: basic_physics.hpp:61
Simd< double > z
Definition: particles.hpp:19
void copy_particles_to_device_if_resident()
Definition: species.hpp:331
Definition: species.hpp:51
void resize_device_particles(int new_n_ptl)
Definition: species.hpp:280
Definition: species.hpp:52
constexpr double UNIT_CHARGE
Charge of an electron (C)
Definition: constants.hpp:4
KOKKOS_INLINE_FUNCTION double get_f0_eq_thermal_velocity_lnode_h(int inode) const
Definition: species.hpp:730
void exit_XGC(std::string msg)
Definition: globals.hpp:37
void copy_particles_from_device_if_resident()
Definition: species.hpp:337
bool is_adiabatic
Whether this species is adiabatic.
Definition: species.hpp:79
Simd< double > phi
Definition: particles.hpp:20
Definition: magnetic_field.F90:1
static constexpr const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t Async
Definition: space_settings.hpp:82
int n_backup_particles
Definition: species.hpp:116
Eq::Profile< Device > eq_flow
Definition: species.hpp:125
Definition: streamed_parallel_for.hpp:15
SendOpt
Definition: species.hpp:49
SimdConstants ct
Definition: particles.hpp:60
void copy_particles_to_device()
Definition: species.hpp:294
KOKKOS_INLINE_FUNCTION void restore_phase_from_phase0(const AoSoAIndices< Device > &inds, SimdParticles &part_one) const
Definition: species.hpp:672
Species(int n_ptl_in)
Definition: species.hpp:168
double phi[VEC_LEN]
Definition: particles.hpp:71
Simd< double > w2
Definition: particles.hpp:23
double r[VEC_LEN]
Definition: particles.hpp:69
GyroAverageMatrices< HostType > gyro_avg_matrices
Definition: species.hpp:128
Definition: species.hpp:74
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Definition: species.hpp:43
Eq::Profile< Device > eq_temp
Definition: species.hpp:123
bool particles_resident_on_device
Whether the particles can reside on device.
Definition: species.hpp:102
Simd< double > mu
Definition: particles.hpp:52
PtlMvmt(SendOpt send_opt, ReturnOpt return_opt)
Definition: species.hpp:64
static int get_initial_n_ptl(NLReader::NamelistReader &nlr, const Grid< DeviceType > &grid, const DomainDecomposition< DeviceType > &pol_decomp, int sml_special, int species_idx, bool verbose)
Definition: species.tpp:115
int ncycles
Number of subcycles.
Definition: species.hpp:90
Definition: profile.hpp:171
int collision_grid_index
Which collision grid to use.
Definition: species.hpp:121
Definition: particles.hpp:118
SpeciesType
Definition: globals.hpp:77
double z[VEC_LEN]
Definition: particles.hpp:70
void restore_backup_SoA(Cabana::AoSoA< ParticleDataTypes, Device, VEC_LEN > &backup_SoA, int offset, int n) const
Definition: species.hpp:450
Definition: distribution.hpp:10
KOKKOS_INLINE_FUNCTION void check_triangle(const Simd< int > &itr, Simd< bool > &wasnt_in_triangle) const
Definition: grid.tpp:692
void resize_device_particles()
Definition: species.hpp:265
int get_max_n_ptl()
Definition: species.hpp:696
LaunchBounds
Definition: species.hpp:402
KOKKOS_INLINE_FUNCTION void charge_search_index(const MagneticField< Device > &magnetic_field, const SimdVector2D &x, const Simd< double > &phi, const Simd< double > &psi, SimdVector2D &xff, Simd< int > &itr, SimdGridVec &p) const
Definition: grid.tpp:591
Cabana::AoSoA< ParticleDataTypes, HostType, VEC_LEN > particles
Particles.
Definition: species.hpp:95
double w1[VEC_LEN]
Definition: particles.hpp:73
Definition: species.hpp:43