XGC1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
transpose_particles.hpp
Go to the documentation of this file.
1 #ifndef TRANSPOSE_PARTICLES_HPP
2 #define TRANSPOSE_PARTICLES_HPP
3 
4 #include "space_settings.hpp"
5 
6 template<class DataType>
7 void transpose_particles_from_AoS_to_AoSoA(Cabana::AoSoA<DataType,DeviceType,VEC_LEN>& local_particles, int ioffset, int n_vec){
8  // Early return if VEC_LEN=1 since no transposition needed
9  if (VEC_LEN == 1) return;
10 
11  const std::size_t PTL_N_DBL = sizeof( Cabana::Tuple<DataType>)/8;
12 
13  // Pointer to local particles
14  VecParticlesSimple<PTL_N_DBL>* vptl = (VecParticlesSimple<PTL_N_DBL>*)local_particles.data() + ioffset;
15 
16 #ifdef USE_GPU
17  int team_size = VEC_LEN;
18 #else
19  int team_size = 1;
20 #endif
21  int league_size = n_vec;
22 
23  typedef Kokkos::View<double[VEC_LEN*PTL_N_DBL],ExSpace::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> PtlVec;
24  size_t shmem_size = PtlVec::shmem_size(); // Could halve this, but may worsen memory access
25  Kokkos::parallel_for("transpose_particles_from_AoS_to_AoSoA",
26  Kokkos::TeamPolicy<ExSpace> (league_size, team_size).set_scratch_size(KOKKOS_TEAM_SCRATCH_OPT,Kokkos::PerTeam(shmem_size)),
27  KOKKOS_LAMBDA (Kokkos::TeamPolicy<ExSpace>::member_type team_member){
28  // Locally shared: global index in population
29  PtlVec shmem_ptl_vec(team_member.team_scratch(KOKKOS_TEAM_SCRATCH_OPT));
30 
31  int ivec = team_member.league_rank(); // vector assigned to this team
32  int ith = team_member.team_rank(); // This thread's rank in the team
33 
34  // Copy vector to shared memory
35  for(int idbl = ith; idbl<VEC_LEN*PTL_N_DBL; idbl+=team_size){ // Loop through doubles
36  shmem_ptl_vec(idbl) = vptl[ivec].data[idbl];
37  }
38 
39  // Write transposed particles back to particle array
40  for(int iptl = ith; iptl<VEC_LEN; iptl+=team_size){ // Loop through particles
41  for(int iprop = 0; iprop<PTL_N_DBL; iprop++){ // Loop through properties
42  vptl[ivec].data[iprop*VEC_LEN + iptl] = shmem_ptl_vec(iptl*PTL_N_DBL + iprop);
43  }
44  }
45  });
46 
47  Kokkos::fence();
48 }
49 
50 template<class DataType>
51 void transpose_particles_from_AoSoA_to_AoS(Cabana::AoSoA<DataType,DeviceType,VEC_LEN>& local_particles, int ioffset, int n_vec){
52  // Early return if VEC_LEN=1 since no transposition needed
53  if (VEC_LEN == 1) return;
54 
55  const std::size_t PTL_N_DBL = sizeof( Cabana::Tuple<DataType>)/8;
56 
57  // Pointer to local particles
58  VecParticlesSimple<PTL_N_DBL>* vptl = (VecParticlesSimple<PTL_N_DBL>*)local_particles.data() + ioffset;
59 
60 #ifdef USE_GPU
61  int team_size = VEC_LEN;
62 #else
63  int team_size = 1;
64 #endif
65  int league_size = n_vec;
66 
67  typedef Kokkos::View<double[VEC_LEN*PTL_N_DBL],ExSpace::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>> PtlVec;
68  size_t shmem_size = PtlVec::shmem_size(); // Could halve this, but may worsen memory access
69  Kokkos::parallel_for("transpose_particles_from_AoS_to_AoSoA",
70  Kokkos::TeamPolicy<ExSpace> (league_size, team_size).set_scratch_size(KOKKOS_TEAM_SCRATCH_OPT,Kokkos::PerTeam(shmem_size)),
71  KOKKOS_LAMBDA (Kokkos::TeamPolicy<ExSpace>::member_type team_member){
72  // Locally shared: global index in population
73  PtlVec shmem_ptl_vec(team_member.team_scratch(KOKKOS_TEAM_SCRATCH_OPT));
74 
75  int ivec = team_member.league_rank(); // vector assigned to this team
76  int ith = team_member.team_rank(); // This thread's rank in the team
77 
78  // Transpose into shared memory
79  for(int iptl = ith; iptl<VEC_LEN; iptl+=team_size){ // Loop through particles
80  for(int iprop = 0; iprop<PTL_N_DBL; iprop++){ // Loop through properties
81  shmem_ptl_vec(iptl*PTL_N_DBL + iprop) = vptl[ivec].data[iprop*VEC_LEN + iptl];
82  }
83  }
84 
85  // Copy transposed particles back to particle array
86  for(int idbl = ith; idbl<VEC_LEN*PTL_N_DBL; idbl+=team_size){ // Loop through doubles
87  vptl[ivec].data[idbl] = shmem_ptl_vec(idbl);
88  }
89  });
90 
91  Kokkos::fence();
92 }
93 
94 #endif
Definition: particles.hpp:134
double data[T *VEC_LEN]
Definition: particles.hpp:135
void transpose_particles_from_AoSoA_to_AoS(Cabana::AoSoA< DataType, DeviceType, VEC_LEN > &local_particles, int ioffset, int n_vec)
Definition: transpose_particles.hpp:51
void transpose_particles_from_AoS_to_AoSoA(Cabana::AoSoA< DataType, DeviceType, VEC_LEN > &local_particles, int ioffset, int n_vec)
Definition: transpose_particles.hpp:7
#define KOKKOS_TEAM_SCRATCH_OPT
Definition: space_settings.hpp:77
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252