XGC1
transpose_particles.hpp
Go to the documentation of this file.
1 #ifndef TRANSPOSE_PARTICLES_HPP
2 #define TRANSPOSE_PARTICLES_HPP
3 
4 #include "space_settings.hpp"
5 
6 template<class DataType>
8 
9  // Early return if VEC_LEN=1 since no transposition needed
10  if (VEC_LEN == 1) return;
11 
12  const std::size_t PTL_N_DBL = Particles::sizeof_ptl<DataType>()/8;
13 
14  // Pointer to local particles
16 
17 #ifdef USE_GPU
18  int team_size = VEC_LEN;
19 #else
20  int team_size = 1;
21 #endif
22  int league_size = n_vec;
23 
24  typedef Kokkos::View<double[VEC_LEN*PTL_N_DBL],ExSpace::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> PtlVec;
25  size_t shmem_size = PtlVec::shmem_size(); // Could halve this, but may worsen memory access
26  Kokkos::parallel_for("transpose_particles_from_AoS_to_AoSoA",
27  Kokkos::TeamPolicy<ExSpace> (league_size, team_size).set_scratch_size(KOKKOS_TEAM_SCRATCH_OPT,Kokkos::PerTeam(shmem_size)),
28  KOKKOS_LAMBDA (Kokkos::TeamPolicy<ExSpace>::member_type team_member){
29  // Locally shared: global index in population
30  PtlVec shmem_ptl_vec(team_member.team_scratch(KOKKOS_TEAM_SCRATCH_OPT));
31 
32  int ivec = team_member.league_rank(); // vector assigned to this team
33  int ith = team_member.team_rank(); // This thread's rank in the team
34 
35  // Copy vector to shared memory
36  for(int idbl = ith; idbl<VEC_LEN*PTL_N_DBL; idbl+=team_size){ // Loop through doubles
37  shmem_ptl_vec(idbl) = vptl[ivec].data[idbl];
38  }
39 
40  // Write transposed particles back to particle array
41  for(int iptl = ith; iptl<VEC_LEN; iptl+=team_size){ // Loop through particles
42  for(int iprop = 0; iprop<PTL_N_DBL; iprop++){ // Loop through properties
43  vptl[ivec].data[iprop*VEC_LEN + iptl] = shmem_ptl_vec(iptl*PTL_N_DBL + iprop);
44  }
45  }
46  });
47 
48  Kokkos::fence();
49 }
50 
51 template<class DataType>
53  // Early return if VEC_LEN=1 since no transposition needed
54  if (VEC_LEN == 1) return;
55 
56  const std::size_t PTL_N_DBL = Particles::sizeof_ptl<DataType>()/8;
57 
58  // Pointer to local particles
60 
61 #ifdef USE_GPU
62  int team_size = VEC_LEN;
63 #else
64  int team_size = 1;
65 #endif
66  int league_size = n_vec;
67 
68  typedef Kokkos::View<double[VEC_LEN*PTL_N_DBL],ExSpace::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> PtlVec;
69  size_t shmem_size = PtlVec::shmem_size(); // Could halve this, but may worsen memory access
70  Kokkos::parallel_for("transpose_particles_from_AoS_to_AoSoA",
71  Kokkos::TeamPolicy<ExSpace> (league_size, team_size).set_scratch_size(KOKKOS_TEAM_SCRATCH_OPT,Kokkos::PerTeam(shmem_size)),
72  KOKKOS_LAMBDA (Kokkos::TeamPolicy<ExSpace>::member_type team_member){
73  // Locally shared: global index in population
74  PtlVec shmem_ptl_vec(team_member.team_scratch(KOKKOS_TEAM_SCRATCH_OPT));
75 
76  int ivec = team_member.league_rank(); // vector assigned to this team
77  int ith = team_member.team_rank(); // This thread's rank in the team
78 
79  // Transpose into shared memory
80  for(int iptl = ith; iptl<VEC_LEN; iptl+=team_size){ // Loop through particles
81  for(int iprop = 0; iprop<PTL_N_DBL; iprop++){ // Loop through properties
82  shmem_ptl_vec(iptl*PTL_N_DBL + iprop) = vptl[ivec].data[iprop*VEC_LEN + iptl];
83  }
84  }
85 
86  // Copy transposed particles back to particle array
87  for(int idbl = ith; idbl<VEC_LEN*PTL_N_DBL; idbl+=team_size){ // Loop through doubles
88  vptl[ivec].data[idbl] = shmem_ptl_vec(idbl);
89  }
90  });
91 
92  Kokkos::fence();
93 }
94 
95 #endif
Definition: particles.hpp:248
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
type(ptl_type), dimension(:), allocatable, target local_particles
Definition: resamp_mod.F90:150
#define KOKKOS_TEAM_SCRATCH_OPT
Definition: space_settings.hpp:77
Definition: particles.hpp:123
double data[T *VEC_LEN]
Definition: particles.hpp:124
void transpose_particles_from_AoS_to_AoSoA(Particles::Array< DataType, DeviceType > &local_particles, int ioffset, int n_vec)
Definition: transpose_particles.hpp:7
void transpose_particles_from_AoSoA_to_AoS(Particles::Array< DataType, DeviceType > &local_particles, int ioffset, int n_vec)
Definition: transpose_particles.hpp:52