xgca/html/matrix_8hpp_source.html

 #ifndef MATRIX_HPP

 #define MATRIX_HPP


 #include "space_settings.hpp"

 #include "array_deep_copy.hpp"

 #include "my_mirror_view.hpp"

 #include "checkpoint.hpp"


 /* The Matrix class stores a sparse matrix. It is initialized in (?) format but can be converted to CSR (Compressed Sparse Row) with the convert_org_to_csr

  * method.

  * */

 template<class Device>

 class Matrix{


     using exspace = typename Device::execution_space;

     public: // temporarily public, for testing

     enum SetValueOpt{

         Replace=0,

         Add=1

     };


     enum Type{

         Identity=0

     };


     int m;

     int n;

     int width; //< max of non-zero element of each row

     View<double**,CLayout, Device> value;

     View<int**,CLayout, Device> eindex;

     View<int*,CLayout, Device> nelement;


     // use compressed sparse row matrix data structure

     bool is_csr;

     int nnz;

     View<int*,CLayout, Device> csr_ridx;

     View<double*,CLayout, Device> csr_v;

     View<int*,CLayout, Device> csr_cidx;


     public:


     Matrix(){}


     Matrix(int m_in,int n_in,int w_in)

       : m(m_in),

         n(n_in),

         width(w_in),

         value("value",m_in,w_in), // init to zero

         eindex(NoInit("eindex"), m_in,w_in),

         nelement("nelement",m_in), // init to zero

         is_csr(false) // start from fixed width matrix format

     {

         Kokkos::deep_copy(eindex, -1); // For debugging; no initialization should be necessary

     }


     Matrix(Type matrix_type, int m_in)

       : m(m_in),

         n(m_in),

         width(1),

         value("value",m,width), // init to zero

         eindex(NoInit("eindex"),m,width),

         nelement("nelement",m), // init to zero

         is_csr(false) // start from fixed width matrix format

     {

         for(int i=0; i<m; i++){

             set_value(i,i,1.0,SetValueOpt::Replace);

         }


         // Convert to CSR matrix to save memory

         convert_org_to_csr();

     }


     Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int* csr_ridx_or_eindex, int* csr_cidx_or_nelement, double* values)

       : m(m_in),

         n(n_in),

         width(w_in),

         nnz(nnz_in),

         is_csr(is_csr_in)

     {

         if(is_csr){

             csr_ridx = View<int*,CLayout, Device>(NoInit("csr_ridx"),m+1);

             csr_v = View<double*,CLayout, Device>(NoInit("csr_v"),nnz);

             csr_cidx = View<int*,CLayout, Device>(NoInit("csr_cidx"),nnz);

             array_deep_copy(csr_ridx, csr_ridx_or_eindex);

             array_deep_copy(csr_v, values);

             array_deep_copy(csr_cidx, csr_cidx_or_nelement);

         } else {

             value = View<double**,CLayout, Device>(NoInit("value"), m,width);

             eindex = View<int**,CLayout, Device>(NoInit("eindex"), m,width);

             nelement = View<int*,CLayout, Device>(NoInit("nelement"), m);

             array_deep_copy(eindex, csr_ridx_or_eindex);

             array_deep_copy(value, values);

             array_deep_copy(nelement, csr_cidx_or_nelement);

         }

     }


     // Create a mirror with a different device type

     template<class Device2>

     inline Matrix<Device2> mirror() const{

         Matrix<Device2> mtx;


         mtx.m = m;

         mtx.n = n;

         mtx.width = width;

         mtx.value = my_mirror_view(value, Device2());

         mirror_copy(mtx.value, value);

         mtx.eindex = my_mirror_view(eindex, Device2());

         mirror_copy(mtx.eindex, eindex);

         mtx.nelement = my_mirror_view(nelement, Device2());

         mirror_copy(mtx.nelement, nelement);


         mtx.is_csr = is_csr;

         mtx.nnz = nnz;

         mtx.csr_ridx = my_mirror_view(csr_ridx, Device2());

         mirror_copy(mtx.csr_ridx, csr_ridx);

         mtx.csr_v = my_mirror_view(csr_v, Device2());

         mirror_copy(mtx.csr_v, csr_v);

         mtx.csr_cidx = my_mirror_view(csr_cidx, Device2());

         mirror_copy(mtx.csr_cidx, csr_cidx);

         return mtx;

     }


     void convert_org_to_csr(){

         if(is_csr){

             printf("\nWarning: Trying to convert matrix to csr format, but it is already in csr format.\n");

             return;

         }


         //counting nonzero elements

         is_csr=true;


         nnz = 0;

         for (int i=0; i<m; i++){

             nnz+=nelement(i);

         }


         //allocate memory -- 0-based index for C++ compatiblity

         csr_ridx = View<int*,CLayout, Device>(NoInit("csr_ridx"),m+1);

         csr_v = View<double*,CLayout, Device>(NoInit("csr_v"),nnz);

         csr_cidx = View<int*,CLayout, Device>(NoInit("csr_cidx"),nnz);


         //sorting of eindex is required if element searching is required.

         //currently unsorted -- maybe soring can speed up matrix multiplication?


         //assign values

         csr_ridx(0)=0; // start from 0 - 0 based

         for (int i=0; i<m; i++){

           csr_ridx(i+1)=csr_ridx(i)+nelement(i);

           for (int j=0; j<nelement(i); j++){

             int loc=csr_ridx(i)+j;

             csr_cidx(loc)=eindex(i,j) - 1; // eindex is 1-indexed

             csr_v(loc)=value(i,j);

           }

         }


         //deallocate org data structure

         value = View<double**,CLayout, Device>();

         eindex = View<int**,CLayout, Device>();

         nelement = View<int*,CLayout, Device>();

     }


     // i row index

     // j column index

     // value_in is value

     // flag is whether to replace or add

     KOKKOS_INLINE_FUNCTION void set_value(int i, int j, double value_in, SetValueOpt flag) const{

         if(is_csr){

             DEVICE_PRINTF("\n Error: set_value is only allowed when matrix is not in csr format");

             return;

         }


         // Check matrix indices are valid

         if(i>=m || j>=n || i<0 || j<0){

             DEVICE_PRINTF("\n Error: Out of bounds access in set_value");

             return;

         }


         // Loop through indices that have already been assigned

         for (int l=0; l<nelement(i); l++){

            if(j==eindex(i,l)-1){ // If this index has already been assigned, replace or add the value (eindex is 1-indexed)

               if(flag==Replace){

                  value(i,l) = value_in;

               } else {

                  value(i,l) += value_in;

               }

               return; //DONE already -- exit the function

            }

         }


         // If the index has not already been assigned, then we need to add the index

         // First, check that we have room in our mapping to add another index

         if(nelement(i) < width){

             int l=nelement(i);

             eindex(i,l)=j+1; // 1-indexed

             value(i,l)=value_in;

             nelement(i) += 1;

         }else{

             DEVICE_PRINTF("\nError in set_value: Not enough memory space for matrix.");

             DEVICE_PRINTF("\nIncrease width. width=%d but nelement(%d)=%d",width,i,nelement(i));

         }

     }


 //    private: // Can't be private because there is this cuda error:

 //                The enclosing parent function for an extended __host__ __device__ lambda cannot have private or protected access within its class

 //                There should be some workarounds available if this is a problem.


     void mult_org(const View<double*,CLayout,Device>& x, const View<double*,Kokkos::LayoutRight,Device>& y) const{

         Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda

         Kokkos::parallel_for("mult_org", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){

             y(i)=0.0;

             for (int j=0; j<mat.nelement(i); j++){

                 y(i) += mat.value(i,j)*x(mat.eindex(i,j)-1);

             }

         });

     }


     void mult_csr(const View<double*,CLayout,Device>& x, const View<double*,Kokkos::LayoutRight,Device>& y) const{

         Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda

         Kokkos::parallel_for("mult_csr", Kokkos::RangePolicy<exspace>(0,m), KOKKOS_LAMBDA( const int i ){

             y(i)=0.0;

             for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){

                 y(i) += mat.csr_v(j)*x(mat.csr_cidx(j));

             }

         });

     }


 /* Not ready - may need atomics

     **

      * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in original data format.

      *

      * @param[in] x is the input vector

      * @param[out] y is the result

      * @return void

      *

     void transpose_mult_org(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){

         Kokkos::deep_copy(y,0.0);

         for (i=0; i<m; i++){

             for (j=0; j<nelement(i); j++){

                 int k=eindex(i,j)-1;

                 y(k) += value(i,j)*x(i);

             }

         });

     }


     **

      * Matrix multiplication using the transpose, i.e.: y = A^T x. Matrix is in CSR format

      *

      * @param[in] x is the input vector

      * @param[out] y is the result

      * @return void

      *

     void transpose_mult_csr(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){

         Kokkos::deep_copy(y,0.0);

         for (i=0; i<m; i++){

             for (j=csr_ridx(i); j<csr_ridx(i+1); j++){

                 int k=csr_cidx(j);

                 y(k) += csr_v(j)*x(i);

             }

         });

     }

 */


     void mult_tensor_org(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{

         int nv = x.extent(1); // width of tensor

         Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda

         Kokkos::parallel_for("mult_tensor_org", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){

             int k = idx%nv;

             int i = idx/nv;

             y(i,k)=0.0;

             for (int j=0; j<=mat.nelement(i); j++){

                 y(i,k) += mat.value(i,j)*x(mat.eindex(i,j)-1,k);

             }

         });

     }


     void mult_tensor_csr(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{

         int nv = x.extent(1); // width of tensor

         Matrix<Device> mat = *this; // Make local shallow copy so class members are captured by lambda

         Kokkos::parallel_for("mult_tensor_csr", Kokkos::RangePolicy<exspace>(0,m*nv), KOKKOS_LAMBDA( const int idx ){

             int k = idx%nv;

             int i = idx/nv;

             y(i,k)=0.0;

             for (int j=mat.csr_ridx(i); j<mat.csr_ridx(i+1); j++){

                 y(i,k) += mat.csr_v(j)*x(mat.csr_cidx(j),k);

             }

         });

     }


     public:


     void mult(const View<double*,CLayout,Device>& x,const View<double*,Kokkos::LayoutRight,Device>& y) const{

         if(is_csr){

             mult_csr(x,y);

         } else {

             mult_org(x,y);

         }

     }


 /*    void transpose_mult(const View<double*,CLayout,Device>& x, View<double*,Kokkos::LayoutRight,Device>& y){

         if(is_csr){

             transpose_mult_csr(x,y);

         } else {

             transpose_mult_org(x,y);

         }

     }*/


     void mult_tensor(const View<double**,CLayout,Device>& x, const View<double**,Kokkos::LayoutRight,Device>& y) const{

         if(is_csr){

             mult_tensor_csr(x,y);

         } else {

             mult_tensor_org(x,y);

         }

     }

 };


 #endif

array_deep_copy
void array_deep_copy(T *array, const Kokkos::View< T *, Kokkos::LayoutRight, Device > &view)
Definition: array_deep_copy.hpp:11

Matrix::eindex
View< int **, CLayout, Device > eindex
column index - 1-indexed!!
Definition: matrix.hpp:30

mirror_copy
void mirror_copy(T1 &view_dest, const T2 &view_src)
Definition: my_mirror_view.hpp:122

DEVICE_PRINTF
#define DEVICE_PRINTF(...)
Definition: space_settings.hpp:85

my_mirror_view.hpp

Matrix::mult_tensor
void mult_tensor(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:393

Matrix::mirror
Matrix< Device2 > mirror() const
Definition: matrix.hpp:122

Matrix::Matrix
Matrix(int m_in, int n_in, int w_in, int nnz_in, bool is_csr_in, int *csr_ridx_or_eindex, int *csr_cidx_or_nelement, double *values)
Definition: matrix.hpp:96

Matrix::nnz
int nnz
if in CSR format, number of (nonzero) values
Definition: matrix.hpp:35

Matrix::csr_v
View< double *, CLayout, Device > csr_v
value of CSR - nnz
Definition: matrix.hpp:37

Matrix::Replace
Definition: matrix.hpp:18

Matrix< HostType >::exspace
typename HostType::execution_space exspace
Use execution space where matrix views are allocated.
Definition: matrix.hpp:15

Matrix::Add
Definition: matrix.hpp:19

Matrix::n
int n
of columns (size of each row)
Definition: matrix.hpp:27

Matrix::csr_cidx
View< int *, CLayout, Device > csr_cidx
columun index - nnz
Definition: matrix.hpp:38

Matrix::mult_tensor_csr
void mult_tensor_csr(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:341

Matrix::csr_ridx
View< int *, CLayout, Device > csr_ridx
row index for CSR - m+1
Definition: matrix.hpp:36

Matrix::m
int m
of rows (size of each column)
Definition: matrix.hpp:26

Matrix::width
int width
Definition: matrix.hpp:28

Matrix
Definition: matrix.hpp:13

array_deep_copy.hpp

Matrix::is_csr
bool is_csr
Whether the matrix is in CSR format.
Definition: matrix.hpp:34

Matrix::Matrix
Matrix()
Definition: matrix.hpp:42

my_mirror_view
View< T *, CLayout, Device > my_mirror_view(const View< T *, CLayout, Device > &view, Device nd)
Definition: my_mirror_view.hpp:14

Matrix::convert_org_to_csr
void convert_org_to_csr()
Definition: matrix.hpp:151

Matrix::Matrix
Matrix(int m_in, int n_in, int w_in)
Definition: matrix.hpp:51

Matrix::set_value
KOKKOS_INLINE_FUNCTION void set_value(int i, int j, double value_in, SetValueOpt flag) const
Definition: matrix.hpp:203

Matrix< HostType >::Type
Type
Definition: matrix.hpp:22

Matrix< HostType >::SetValueOpt
SetValueOpt
Definition: matrix.hpp:17

Matrix::mult
void mult(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:363

Matrix::nelement
View< int *, CLayout, Device > nelement
of non-zero element of each row
Definition: matrix.hpp:31

Matrix::Matrix
Matrix(Type matrix_type, int m_in)
Definition: matrix.hpp:67

Streamed::parallel_for
void parallel_for(const std::string name, int n_ptl, Function func, Option option, HostAoSoA aosoa_h, DeviceAoSoA aosoa_d)
Definition: streamed_parallel_for.hpp:252

NoInit
Kokkos::ViewAllocateWithoutInitializing NoInit
Definition: space_settings.hpp:68

Matrix::Identity
Definition: matrix.hpp:23

Matrix::mult_org
void mult_org(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:251

Matrix::value
View< double **, CLayout, Device > value
matrix value
Definition: matrix.hpp:29

Matrix::mult_tensor_org
void mult_tensor_org(const View< double **, CLayout, Device > &x, const View< double **, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:321

space_settings.hpp

Matrix::mult_csr
void mult_csr(const View< double *, CLayout, Device > &x, const View< double *, Kokkos::LayoutRight, Device > &y) const
Definition: matrix.hpp:268

checkpoint.hpp