13 # ifdef USE_ARRAY_REPLICATION
14 # error Cannot use ARRAY REPLICATION on GPU
17 # ifndef USE_ARRAY_REPLICATION
18 # error Must use ARRAY REPLICATION if executing on CPU for now
22 #ifdef USE_ARRAY_REPLICATION
31 #if defined(USE_ARRAY_REPLICATION) && defined(USE_OMP)
32 return omp_get_thread_num();
41 #ifdef USE_ARRAY_REPLICATION
44 Kokkos::atomic_add(addr, val);
52 #ifdef USE_ARRAY_REPLICATION
53 int n_threads = view.extent(0);
54 int size_per_thread = view.size()/n_threads;
56 auto thread_0_ptr = view.data();
57 auto thread_i_ptr = view.data();
59 for(
int i = 1; i<n_threads; i++){
60 thread_i_ptr += size_per_thread;
61 Kokkos::parallel_for(
"reduce_replicated_array", Kokkos::RangePolicy<HostExSpace>( 0, size_per_thread), KOKKOS_LAMBDA(
const int idx){
62 thread_0_ptr[idx] += thread_i_ptr[idx];
KOKKOS_INLINE_FUNCTION int get_thread()
Definition: access_add.hpp:30
KOKKOS_INLINE_FUNCTION void access_add(T *addr, T val)
Definition: access_add.hpp:40
ScatterType
Definition: access_add.hpp:7
void reduce_replicated_array(T &view)
Definition: access_add.hpp:51
constexpr ScatterType SCATTER_TYPE_GLOBAL
Definition: access_add.hpp:25
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252