XGCa
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix.hpp
Go to the documentation of this file.
1 #ifndef MATRIX_HPP
2 #define MATRIX_HPP
3 
4 #include "space_settings.hpp"
5 #include "array_deep_copy.hpp"
6 #include "my_mirror_view.hpp"
7 #include "checkpoint.hpp"
8 
9 /* The Matrix class stores a sparse matrix. It is initialized in (?) format but can be converted to CSR (Compressed Sparse Row) with the convert_org_to_csr
10  * method.
11  * */
12 template<class Device>
13 class Matrix{
14 
15  using exspace = typename Device::execution_space;
16  public: // temporarily public, for testing
19  Add=1
20  };
21 
22  enum Type{
24  };
25 
26  int m;
27  int n;
28  int width; //< max of non-zero element of each row
29  View<double**,CLayout, Device> value;
30  View<int**,CLayout, Device> eindex;
31  View<int*,CLayout, Device> nelement;
32 
33  // use compressed sparse row matrix data structure
34  bool is_csr;
35  int nnz;
36  View<int*,CLayout, Device> csr_ridx;
37  View<double*,CLayout, Device> csr_v;
38  View<int*,CLayout, Device> csr_cidx;
39 
40  public:
41 
42  Matrix(){}
43 
51  Matrix(int m_in,int n_in,int w_in)
52  : m(m_in),
53  n(n_in),
54  width(w_in),
55  value("value",m_in,w_in), // init to zero
56  eindex(NoInit("eindex"), m_in,w_in),
57  nelement("nelement",m_in), // init to zero
58  is_csr(false) // start from fixed width matrix format
59  {
60  Kokkos::deep_copy(eindex, -1); // For debugging; no initialization should be necessary
61  }
62 
67  Matrix(Type matrix_type, int m_in)
68  : m(m_in),
69  n(m_in),
70  width(1),
71  value("value",m,width), // init to zero
72  eindex(NoInit("eindex"),m,width),
73  nelement("nelement",m), // init to zero
74  is_csr(false) // start from fixed width matrix format
75  {
76  for(int i=0; i<m; i++){
77  set_value(i,i,1.0,SetValueOpt::Replace);
78  }
79 
80  // Convert to CSR matrix to save memory
82  }
83 
96  Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int* csr_ridx_or_eindex, int* csr_cidx_or_nelement, double* values)
97  : m(m_in),
98  n(n_in),
99  width(w_in),
100  nnz(nnz_in),
101  is_csr(is_csr_in)
102  {
103  if(is_csr){
104  csr_ridx = View<int*,CLayout, Device>(NoInit("csr_ridx"),m+1);
105  csr_v = View<double*,CLayout, Device>(NoInit("csr_v"),nnz);
106  csr_cidx = View<int*,CLayout, Device>(NoInit("csr_cidx"),nnz);
107  array_deep_copy(csr_ridx, csr_ridx_or_eindex);
108  array_deep_copy(csr_v, values);
109  array_deep_copy(csr_cidx, csr_cidx_or_nelement);
110  } else {
111  value = View<double**,CLayout, Device>(NoInit("value"), m,width);
112  eindex = View<int**,CLayout, Device>(NoInit("eindex"), m,width);
113  nelement = View<int*,CLayout, Device>(NoInit("nelement"), m);
114  array_deep_copy(eindex, csr_ridx_or_eindex);
115  array_deep_copy(value, values);
116  array_deep_copy(nelement, csr_cidx_or_nelement);
117  }
118  }
119 
120  // Create a mirror with a different device type
121  template<class Device2>
122  inline Matrix<Device2> mirror() const{
123  Matrix<Device2> mtx;
124 
125  mtx.m = m;
126  mtx.n = n;
127  mtx.width = width;
128  mtx.value = my_mirror_view(value, Device2());
129  mirror_copy(mtx.value, value);
130  mtx.eindex = my_mirror_view(eindex, Device2());
131  mirror_copy(mtx.eindex, eindex);
132  mtx.nelement = my_mirror_view(nelement, Device2());
134 
135  mtx.is_csr = is_csr;
136  mtx.nnz = nnz;
137  mtx.csr_ridx = my_mirror_view(csr_ridx, Device2());
139  mtx.csr_v = my_mirror_view(csr_v, Device2());
140  mirror_copy(mtx.csr_v, csr_v);
141  mtx.csr_cidx = my_mirror_view(csr_cidx, Device2());
143  return mtx;
144  }
145 
152  if(is_csr){
153  printf("\nWarning: Trying to convert matrix to csr format, but it is already in csr format.\n");
154  return;
155  }
156 
157  //counting nonzero elements
158  is_csr=true;
159 
160  nnz = 0;
161  for (int i=0; i<m; i++){
162  nnz+=nelement(i);
163  }
164 
165  //allocate memory -- 0-based index for C++ compatiblity
166  csr_ridx = View<int*,CLayout, Device>(NoInit("csr_ridx"),m+1);
167  csr_v = View<double*,CLayout, Device>(NoInit("csr_v"),nnz);
168  csr_cidx = View<int*,CLayout, Device>(NoInit("csr_cidx"),nnz);
169 
170  //sorting of eindex is required if element searching is required.
171  //currently unsorted -- maybe soring can speed up matrix multiplication?
172 
173  //assign values
174  csr_ridx(0)=0; // start from 0 - 0 based
175  for (int i=0; i<m; i++){
176  csr_ridx(i+1)=csr_ridx(i)+nelement(i);
177  for (int j=0; j<nelement(i); j++){
178  int loc=csr_ridx(i)+j;
179  csr_cidx(loc)=eindex(i,j) - 1; // eindex is 1-indexed
180  csr_v(loc)=value(i,j);
181  }
182  }
183 
184  //deallocate org data structure
185  value = View<double**,CLayout, Device>();
186  eindex = View<int**,CLayout, Device>();
187  nelement = View<int*,CLayout, Device>();
188  }
189 
190  // i row index
191  // j column index
192  // value_in is value
193  // flag is whether to replace or add
203  KOKKOS_INLINE_FUNCTION void set_value(int i, int j, double value_in, SetValueOpt flag) const{
204  if(is_csr){
205  DEVICE_PRINTF("\n Error: set_value is only allowed when matrix is not in csr format");
206  return;
207  }
208 
209  // Check matrix indices are valid
210  if(i>=m || j>=n || i<0 || j<0){
211  DEVICE_PRINTF("\n Error: Out of bounds access in set_value");
212  return;
213  }
214 
215  // Loop through indices that have already been assigned
216  for (int l=0; l<nelement(i); l++){
217  if(j==eindex(i,l)-1){ // If this index has already been assigned, replace or add the value (eindex is 1-indexed)
218  if(flag==Replace){
219  value(i,l) = value_in;
220  } else {
221  value(i,l) += value_in;
222  }
223  return; //DONE already -- exit the function
224  }
225  }
226 
227  // If the index has not already been assigned, then we need to add the index
228  // First, check that we have room in our mapping to add another index
229  if(nelement(i) < width){
230  int l=nelement(i);
231  eindex(i,l)=j+1; // 1-indexed
232  value(i,l)=value_in;
233  nelement(i) += 1;
234  }else{
235  DEVICE_PRINTF("\nError in set_value: Not enough memory space for matrix.");
236  DEVICE_PRINTF("\nIncrease width. width=%d but nelement(%d)=%d",width,i,nelement(i));
237  }
238  }
239 
240 // private: // Can't be private because there is this cuda error:
241 // The enclosing parent function for an extended __host__ __device__ lambda cannot have private or protected access within its class
242 // There should be some workarounds available if this is a problem.
243 
251  void mult_org(const View<double*,CLayout,Device>& x, const View<double*,Kokkos::LayoutRight,Device>& y) const{
252  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
253  Kokkos::parallel_for("mult_org", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){
254  y(i)=0.0;
255  for (int j=0; j<mat.nelement(i); j++){
256  y(i) += mat.value(i,j)*x(mat.eindex(i,j)-1);
257  }
258  });
259  }
260 
268  void mult_csr(const View<double*,CLayout,Device>& x, const View<double*,Kokkos::LayoutRight,Device>& y) const{
269  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
270  Kokkos::parallel_for("mult_csr", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){
271  y(i)=0.0;
272  for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){
273  y(i) += mat.csr_v(j)*x(mat.csr_cidx(j));
274  }
275  });
276  }
277 
278 /* Not ready - may need atomics
279  **
280  * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in original data format.
281  *
282  * @param[in] x is the input vector
283  * @param[out] y is the result
284  * @return void
285  *
286  void transpose_mult_org(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){
287  Kokkos::deep_copy(y,0.0);
288  for (i=0; i<m; i++){
289  for (j=0; j<nelement(i); j++){
290  int k=eindex(i,j)-1;
291  y(k) += value(i,j)*x(i);
292  }
293  });
294  }
295 
296  **
297  * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in CSR format
298  *
299  * @param[in] x is the input vector
300  * @param[out] y is the result
301  * @return void
302  *
303  void transpose_mult_csr(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){
304  Kokkos::deep_copy(y,0.0);
305  for (i=0; i<m; i++){
306  for (j=csr_ridx(i); j<csr_ridx(i+1); j++){
307  int k=csr_cidx(j);
308  y(k) += csr_v(j)*x(i);
309  }
310  });
311  }
312 */
313 
321  void mult_tensor_org(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{
322  int nv = x.extent(1); // width of tensor
323  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
324  Kokkos::parallel_for("mult_tensor_org", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){
325  int k = idx%nv;
326  int i = idx/nv;
327  y(i,k)=0.0;
328  for (int j=0; j<=mat.nelement(i); j++){
329  y(i,k) += mat.value(i,j)*x(mat.eindex(i,j)-1,k);
330  }
331  });
332  }
333 
341  void mult_tensor_csr(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{
342  int nv = x.extent(1); // width of tensor
343  Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda
344  Kokkos::parallel_for("mult_tensor_csr", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){
345  int k = idx%nv;
346  int i = idx/nv;
347  y(i,k)=0.0;
348  for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){
349  y(i,k) += mat.csr_v(j)*x(mat.csr_cidx(j),k);
350  }
351  });
352  }
353 
354  public:
355 
363  void mult(const View<double*,CLayout,Device>& x,const View<double*,Kokkos::LayoutRight,Device>& y) const{
364  if(is_csr){
365  mult_csr(x,y);
366  } else {
367  mult_org(x,y);
368  }
369  }
370 
378 /* void transpose_mult(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){
379  if(is_csr){
380  transpose_mult_csr(x,y);
381  } else {
382  transpose_mult_org(x,y);
383  }
384  }*/
385 
393  void mult_tensor(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{
394  if(is_csr){
395  mult_tensor_csr(x,y);
396  } else {
397  mult_tensor_org(x,y);
398  }
399  }
400 };
401 
402 #endif
void array_deep_copy(T *array, const Kokkos::View< T *, Kokkos::LayoutRight, Device > &view)
Definition: array_deep_copy.hpp:11
View< int **, CLayout, Device > eindex
column index - 1-indexed!!
Definition: matrix.hpp:30
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122
#define DEVICE_PRINTF(...)
Definition: space_settings.hpp:85
void mult_tensor(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:393
Matrix< Device2 > mirror() const
Definition: matrix.hpp:122
Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int *csr_ridx_or_eindex, int *csr_cidx_or_nelement, double *values)
Definition: matrix.hpp:96
int nnz
if in CSR format, number of (nonzero) values
Definition: matrix.hpp:35
View< double *, CLayout, Device > csr_v
value of CSR - nnz
Definition: matrix.hpp:37
Definition: matrix.hpp:18
typename HostType::execution_space exspace
Use execution space where matrix views are allocated.
Definition: matrix.hpp:15
Definition: matrix.hpp:19
int n
of columns (size of each row)
Definition: matrix.hpp:27
idx
Definition: diag_f0_df_port1.hpp:32
View< int *, CLayout, Device > csr_cidx
columun index - nnz
Definition: matrix.hpp:38
void mult_tensor_csr(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:341
View< int *, CLayout, Device > csr_ridx
row index for CSR - m+1
Definition: matrix.hpp:36
int m
of rows (size of each column)
Definition: matrix.hpp:26
int width
Definition: matrix.hpp:28
Definition: matrix.hpp:13
bool is_csr
Whether the matrix is in CSR format.
Definition: matrix.hpp:34
Matrix()
Definition: matrix.hpp:42
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14
void convert_org_to_csr()
Definition: matrix.hpp:151
Matrix(int m_in, int n_in, int w_in)
Definition: matrix.hpp:51
KOKKOS_INLINE_FUNCTION void set_value(int i, int j, double value_in, SetValueOpt flag) const
Definition: matrix.hpp:203
Type
Definition: matrix.hpp:22
SetValueOpt
Definition: matrix.hpp:17
void mult(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:363
View< int *, CLayout, Device > nelement
of non-zero element of each row
Definition: matrix.hpp:31
Matrix(Type matrix_type, int m_in)
Definition: matrix.hpp:67
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:68
Definition: matrix.hpp:23
void mult_org(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:251
View< double **, CLayout, Device > value
matrix value
Definition: matrix.hpp:29
void mult_tensor_org(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:321
void mult_csr(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:268