XGCa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
matrix.hpp
Go to the documentation of this file.
1 #ifndef MATRIX_HPP
2 #define MATRIX_HPP
3 
4 #include "space_settings.hpp"
5 #include "array_deep_copy.hpp"
6 
7 /* The Matrix class stores a sparse matrix. It is initialized in (?) format but can be converted to CSR (Compressed Sparse Row) with the convert_org_to_csr
8  * method.
9  * */
10 template<class Device>
11 class Matrix{
12 
13  using exspace = typename Device::execution_space;
14 
17  Add=1
18  };
19 
20  int m;
21  int n;
22  int width; //< max of non-zero element of each row
23  Kokkos::View<double**,Kokkos::LayoutRight, Device> value;
24  Kokkos::View<int**,Kokkos::LayoutRight, Device> eindex;
25  Kokkos::View<int*,Kokkos::LayoutRight, Device> nelement;
26 
27  // use compressed sparse row matrix data structure
28  bool is_csr;
29  int nnz;
30  Kokkos::View<int*,Kokkos::LayoutRight, Device> csr_ridx;
31  Kokkos::View<double*,Kokkos::LayoutRight, Device> csr_v;
32  Kokkos::View<int*,Kokkos::LayoutRight, Device> csr_cidx;
33 
34  public:
35 
36  Matrix(){}
37 
45  Matrix(int m_in,int n_in,int w_in)
46  : m(m_in),
47  n(n_in),
48  width(w_in),
49  value("value",m_in,w_in), // init to zero
50  eindex(Kokkos::ViewAllocateWithoutInitializing("eindex"), m_in,w_in),
51  nelement("nelement",m_in), // init to zero
52  is_csr(false) // start from fixed width matrix format
53  {}
54 
67  Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int* csr_ridx_or_eindex, int* csr_cidx_or_nelement, double* values)
68  : m(m_in),
69  n(n_in),
70  width(w_in),
71  nnz(nnz_in),
72  is_csr(is_csr_in)
73  {
74  if(is_csr){
75  csr_ridx = Kokkos::View<int*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_ridx"),m+1);
76  csr_v = Kokkos::View<double*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_v"),nnz);
77  csr_cidx = Kokkos::View<int*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_cidx"),nnz);
78  array_deep_copy(csr_ridx, csr_ridx_or_eindex);
79  array_deep_copy(csr_v, values);
80  array_deep_copy(csr_cidx, csr_cidx_or_nelement);
81  } else {
82  value = Kokkos::View<double**,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("value"), m,width);
83  eindex = Kokkos::View<int**,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("eindex"), m,width);
84  nelement = Kokkos::View<int*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("nelement"), m);
85  array_deep_copy(eindex, csr_ridx_or_eindex);
86  array_deep_copy(value, values);
87  array_deep_copy(nelement, csr_cidx_or_nelement);
88  }
89  }
90 
91 /* NOT READY YET - NEEDS TO CHECK CPU V GPU
92  **
93  * Converts matrix from original format to CSR.
94  *
95  * @return void
96  *
97  void convert_org_to_csr(){
98  if(is_csr){
99  printf("\nwarning:matrix is csr already\n");
100  return;
101  }
102 
103  //counting nonzero elements
104  is_csr=true;
105 
106  nnz = 0;
107  for (int i=0; i<m; i++){
108  nnz+=nelement(i);
109  }
110 
111  //allocate memory -- 0-based index for C++ compatiblity
112  csr_ridx = Kokkos::View<int*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_ridx"),m+1);
113  csr_v = Kokkos::View<double*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_v"),nnz);
114  csr_cidx = Kokkos::View<int*,Kokkos::LayoutRight, Device>(Kokkos::ViewAllocateWithoutInitializing("csr_cidx"),nnz);
115 
116  //sorting of eindex is required if element searching is required.
117  //currently unsorted -- maybe soring can speed up matrix multiplication?
118 
119  //assign values
120  csr_ridx(0)=0; // start from 0 - 0 based
121  for (int i=0; i<m; i++){
122  csr_ridx(i+1)=csr_ridx(i)+nelement(i);
123  for (int j=0; j<nelement(i); j++){
124  int loc=csr_ridx(i)+j;
125  csr_cidx(loc)=eindex(i,j);
126  csr_v(loc)=value(i,j);
127  }
128  }
129 
130  //deallocate org data structure
131  value = Kokkos::View<double**,Kokkos::LayoutRight, Device>;
132  eindex = Kokkos::View<int**,Kokkos::LayoutRight, Device>;
133  nelement = Kokkos::View<int*,Kokkos::LayoutRight, Device>;
134  }
135 
136  // i row index
137  // j column index
138  // value_in is value
139  // flag is whether to replace or add
140  **
141  * Set value of matrix component. Inlined since this will presumably be called a lot in a loop
142  *
143  * @param[in] i is the row index
144  * @param[in] j is the column index
145  * @param[in] value_in is the value to set
146  * @param[in] flag is whether to add or replace if there is an existing value in the chosen matrix element
147  * @return void
148  *
149  inline void set_value(int i, int j, double value_in, SetValueOpt flag){
150  if(is_csr){
151  exit_XGC("\n Error: set_value is only allowed when matrix is not in csr format");
152  }
153 
154  // error check
155  if(i>=m || j>=n || i<0 || j<0){
156  exit_XGC("\n Error: Out of bounds access in set_value");
157  }
158 
159  // check same node
160  for (int l=0; l<nelement(i); l++){
161  // redundant point
162  if(j==eindex(i,l)){
163  if(flag==Replace){
164  value(i,l) = value_in;
165  } else {
166  value(i,l) += value_in;
167  }
168  return; //DONE already -- exit the function
169  }
170  }
171 
172  // new point
173  nelement(i) += 1;
174  int l=nelement(i);
175 
176  // Check l is within bounds
177  if(l < width){
178  printf("Error: Not enough memory space for matrix.");
179  printf("Increase width. %d %d %d",width,nelement(i),i;
180  exit_XGC("\n Error in set_value");
181  }
182 
183  eindex(i,l)=j;
184  value(i,l)=value;
185  }
186 */
187 // private: // Can't be private because there is this cuda error:
188 // The enclosing parent function for an extended __host__ __device__ lambda cannot have private or protected access within its class
189 // There should be some workarounds available if this is a problem.
190 
198  void mult_org(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x, const Kokkos::View<double*,Kokkos::LayoutRight,Device>& y) const{
199  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
200  Kokkos::parallel_for("mult_org", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){
201  y(i)=0.0;
202  for (int j=0; j<mat.nelement(i); j++){
203  y(i) += mat.value(i,j)*x(mat.eindex(i,j)-1);
204  }
205  });
206  }
207 
215  void mult_csr(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x, const Kokkos::View<double*,Kokkos::LayoutRight,Device>& y) const{
216  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
217  Kokkos::parallel_for("mult_csr", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){
218  y(i)=0.0;
219  for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){
220  y(i) += mat.csr_v(j)*x(mat.csr_cidx(j));
221  }
222  });
223  }
224 
225 /* Not ready - may need atomics
226  **
227  * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in original data format.
228  *
229  * @param[in] x is the input vector
230  * @param[out] y is the result
231  * @return void
232  *
233  void transpose_mult_org(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x, Kokkos::View<double*,Kokkos::LayoutRight,Device>& y){
234  Kokkos::deep_copy(y,0.0);
235  for (i=0; i<m; i++){
236  for (j=0; j<nelement(i); j++){
237  int k=eindex(i,j)-1;
238  y(k) += value(i,j)*x(i);
239  }
240  });
241  }
242 
243  **
244  * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in CSR format
245  *
246  * @param[in] x is the input vector
247  * @param[out] y is the result
248  * @return void
249  *
250  void transpose_mult_csr(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x, Kokkos::View<double*,Kokkos::LayoutRight,Device>& y){
251  Kokkos::deep_copy(y,0.0);
252  for (i=0; i<m; i++){
253  for (j=csr_ridx(i); j<csr_ridx(i+1); j++){
254  int k=csr_cidx(j);
255  y(k) += csr_v(j)*x(i);
256  }
257  });
258  }
259 */
260 
268  void mult_tensor_org(const Kokkos::View<double**,Kokkos::LayoutRight,Device>& x, const Kokkos::View<double**,Kokkos::LayoutRight,Device>& y) const{
269  int nv = x.extent(1); // width of tensor
270  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
271  Kokkos::parallel_for("mult_tensor_org", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){
272  int k = idx%nv;
273  int i = idx/nv;
274  y(i,k)=0.0;
275  for (int j=0; j<=mat.nelement(i); j++){
276  y(i,k) += mat.value(i,j)*x(mat.eindex(i,j)-1,k);
277  }
278  });
279  }
280 
288  void mult_tensor_csr(const Kokkos::View<double**,Kokkos::LayoutRight,Device>& x, const Kokkos::View<double**,Kokkos::LayoutRight,Device>& y) const{
289  int nv = x.extent(1); // width of tensor
290  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
291  Kokkos::parallel_for("mult_tensor_csr", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){
292  int k = idx%nv;
293  int i = idx/nv;
294  y(i,k)=0.0;
295  for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){
296  y(i,k) += mat.csr_v(j)*x(mat.csr_cidx(j),k);
297  }
298  });
299  }
300 
301  public:
302 
310  void mult(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x,const Kokkos::View<double*,Kokkos::LayoutRight,Device>& y) const{
311  if(is_csr){
312  mult_csr(x,y);
313  } else {
314  mult_org(x,y);
315  }
316  }
317 
325 /* void transpose_mult(const Kokkos::View<double*,Kokkos::LayoutRight,Device>& x, Kokkos::View<double*,Kokkos::LayoutRight,Device>& y){
326  if(is_csr){
327  transpose_mult_csr(x,y);
328  } else {
329  transpose_mult_org(x,y);
330  }
331  }*/
332 
340  void mult_tensor(const Kokkos::View<double**,Kokkos::LayoutRight,Device>& x, const Kokkos::View<double**,Kokkos::LayoutRight,Device>& y) const{
341  if(is_csr){
342  mult_tensor_csr(x,y);
343  } else {
344  mult_tensor_org(x,y);
345  }
346  }
347 };
348 
349 #endif
void array_deep_copy(T *array, const Kokkos::View< T *, Kokkos::LayoutRight, Device > &view)
Definition: array_deep_copy.hpp:11
Kokkos::View< int *, Kokkos::LayoutRight, Device > nelement
of non-zero element of each row
Definition: matrix.hpp:25
void mult_org(const Kokkos::View< double *, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:198
void mult_tensor_org(const Kokkos::View< double **, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:268
void mult_csr(const Kokkos::View< double *, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:215
Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int *csr_ridx_or_eindex, int *csr_cidx_or_nelement, double *values)
Definition: matrix.hpp:67
int nnz
if in CSR format, number of (nonzero) values
Definition: matrix.hpp:29
Kokkos::View< int *, Kokkos::LayoutRight, Device > csr_cidx
columun index - nnz
Definition: matrix.hpp:32
Definition: matrix.hpp:16
void mult(const Kokkos::View< double *, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:310
typename Device::execution_space exspace
Use execution space where matrix views are allocated.
Definition: matrix.hpp:13
Definition: matrix.hpp:17
int n
of columns (size of each row)
Definition: matrix.hpp:21
idx
Definition: diag_f0_df_port1.hpp:32
int m
of rows (size of each column)
Definition: matrix.hpp:20
int width
Definition: matrix.hpp:22
Definition: matrix.hpp:11
bool is_csr
Whether the matrix is in CSR format.
Definition: matrix.hpp:28
Kokkos::View< int **, Kokkos::LayoutRight, Device > eindex
column index - 1-indexed!!
Definition: matrix.hpp:24
Matrix()
Definition: matrix.hpp:36
Matrix(int m_in, int n_in, int w_in)
Definition: matrix.hpp:45
SetValueOpt
Definition: matrix.hpp:15
Kokkos::View< double *, Kokkos::LayoutRight, Device > csr_v
value of CSR - nnz
Definition: matrix.hpp:31
Kokkos::View< int *, Kokkos::LayoutRight, Device > csr_ridx
row index for CSR - m+1
Definition: matrix.hpp:30
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
void mult_tensor(const Kokkos::View< double **, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:340
void mult_tensor_csr(const Kokkos::View< double **, Kokkos::LayoutRight, Device > &x, const Kokkos::View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:288
Kokkos::View< double **, Kokkos::LayoutRight, Device > value
matrix value
Definition: matrix.hpp:23