XGCa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
access_add.hpp
Go to the documentation of this file.
1 #ifndef ACCESS_ADD_HPP
2 #define ACCESS_ADD_HPP
3 #include "space_settings.hpp"
4 
5 /* ScatterType template specifies whether the class uses atomics or array replication
6  * */
7 enum class ScatterType{
8  Atomic,
10 };
11 
12 #ifdef USE_ARRAY_REPLICATION
14 #else
16 #endif
17 
18 // returns omp thread if on CPU (using array replication strategy for scatter) or
19 // returns 0 on GPU (uses atomics, no replication needed)
20 KOKKOS_INLINE_FUNCTION int get_thread(){
21 #if defined(USE_ARRAY_REPLICATION) && defined(USE_OMP)
22  return omp_get_thread_num();
23 #else
24  return 0;
25 #endif
26 }
27 
28 // access_add (could use scatter view instead): Uses atomic if array replication is off
29 template<typename T>
30 KOKKOS_INLINE_FUNCTION void access_add(T* addr, T val){
31 #ifdef USE_ARRAY_REPLICATION
32  *addr += val;
33 #else
34  Kokkos::atomic_add(addr, val);
35 #endif
36 }
37 
38 /* Sums the contents of a View into the 0th index of its first dimension if USE_ARRAY_REPLICATION is on
39  */
40 template<typename T>
42 #ifdef USE_ARRAY_REPLICATION
43  int n_threads = view.extent(0);
44  int size_per_thread = view.size()/n_threads;
45 
46  auto thread_0_ptr = view.data();
47  auto thread_i_ptr = view.data();
48 
49  for(int i = 1; i<n_threads; i++){
50  thread_i_ptr += size_per_thread;
51  Kokkos::parallel_for("reduce_replicated_array", Kokkos::RangePolicy<HostExSpace>( 0, size_per_thread), KOKKOS_LAMBDA(const int idx){
52  thread_0_ptr[idx] += thread_i_ptr[idx];
53  });
54  }
55 #endif
56 }
57 
58 #endif
KOKKOS_INLINE_FUNCTION int get_thread()
Definition: access_add.hpp:20
idx
Definition: diag_f0_df_port1.hpp:32
KOKKOS_INLINE_FUNCTION void access_add(T *addr, T val)
Definition: access_add.hpp:30
ScatterType
Definition: access_add.hpp:7
void reduce_replicated_array(T &view)
Definition: access_add.hpp:41
constexpr ScatterType SCATTER_TYPE_GLOBAL
Definition: access_add.hpp:15
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252