1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP 
    2 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP 
   37 #include "boost/numeric/ublas/vector.hpp" 
   38 #include "boost/numeric/ublas/matrix.hpp" 
   39 #include "boost/numeric/ublas/matrix_proxy.hpp" 
   40 #include "boost/numeric/ublas/storage.hpp" 
   41 #include "boost/numeric/ublas/io.hpp" 
   42 #include "boost/numeric/ublas/matrix_expression.hpp" 
   43 #include "boost/numeric/ublas/detail/matrix_assign.hpp" 
   62 template< 
typename T, 
typename InputIteratorT>
 
   63 void Print(std::ostream & ostr, InputIteratorT it_begin, InputIteratorT it_end)
 
   66   std::string delimiters = 
" ";
 
   67   std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
 
   71 template<
typename VectorT, 
typename MatrixT>
 
   73                     unsigned int start_ind,
 
   74                     std::vector<unsigned int> 
const & I,
 
   75                     std::vector<unsigned int> 
const & J,
 
   78   m.resize(I.size(), J.size(), 
false);
 
   81       m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
 
   84 template<
typename VectorT>
 
   86                              std::vector<cl_uint> & blocks_ind,
 
   87                              std::vector<std::vector<unsigned int> > 
const & g_I,
 
   88                              std::vector<std::vector<unsigned int> > 
const & g_J)
 
   90   typedef typename VectorT::value_type        NumericType;
 
   92   std::vector<boost::numeric::ublas::matrix<NumericType> > com_A_I_J(g_I.size());
 
   95     write_to_block(con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
 
   96     std::cout << com_A_I_J[i] << std::endl;
 
  100 template<
typename VectorT>
 
  102                              std::vector<cl_uint> & block_ind,
 
  103                              std::vector<std::vector<unsigned int> > 
const & g_J)
 
  105   typedef typename VectorT::value_type     NumericType;
 
  107   std::vector<boost::numeric::ublas::vector<NumericType> > com_v(g_J.size());
 
  111     com_v[i].resize(g_J[i].
size());
 
  112     for (
vcl_size_t j = 0; j < g_J[i].size(); ++j)
 
  113       com_v[i](j) = con_v[block_ind[i] + j];
 
  114     std::cout << com_v[i] << std::endl;
 
  130                                 std::vector<std::vector<unsigned int> > 
const & g_J,
 
  132                                 std::vector<cl_uint> & blocks_ind,
 
  133                                 std::vector<cl_uint> & matrix_dims)
 
  138     sz += 
static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
 
  139     matrix_dims[2*i] = 
static_cast<cl_uint
>(g_I[i].size());
 
  140     matrix_dims[2*i + 1] = 
static_cast<cl_uint
>(g_J[i].size());
 
  141     blocks_ind[i+1] = blocks_ind[i] + 
static_cast<cl_uint
>(g_I[i].size()*g_J[i].size());
 
  150 template<
typename SizeT>
 
  151 void get_size(std::vector<std::vector<SizeT> > 
const & inds,
 
  156     size += static_cast<unsigned int>(inds[i].
size());
 
  164 template<
typename SizeT>
 
  166                      std::vector<cl_uint>& start_inds)
 
  169     start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].
size());
 
  181 template<
typename MatrixT, 
typename NumericT>
 
  183               unsigned int beg_ind,
 
  187   for (
vcl_size_t i = beg_ind; i < A.size1(); ++i)
 
  188     res += A(i, beg_ind-1)*A(i, beg_ind-1);
 
  199 template<
typename MatrixT, 
typename VectorT, 
typename NumericT>
 
  202                        unsigned int col_ind,
 
  203                        unsigned int start_ind,
 
  207   for (
unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i)
 
  208     res += A(i, col_ind)*v(i);
 
  217 template<
typename MatrixT, 
typename VectorT>
 
  220                  unsigned int beg_ind)
 
  222   for (
unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i)
 
  223     v(i) = A( i, beg_ind-1);
 
  235 template<
typename MatrixT, 
typename VectorT, 
typename NumericT>
 
  251     mu = std::sqrt(A(j,j)*A(j, j) + sg);
 
  255       v(j) = -sg/(A(j, j) + mu);
 
  257     b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
 
  270 template<
typename MatrixT, 
typename VectorT, 
typename NumericT>
 
  272                                   unsigned int iter_cnt,
 
  279   for (
unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i)
 
  283     for (
unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j)
 
  284       A(j, i) -= b*in_prod_res*v(j);
 
  294 template<
typename MatrixT, 
typename VectorT>
 
  299   for (
unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i)
 
  310 template<
typename MatrixT, 
typename VectorT>
 
  313   typedef typename MatrixT::value_type     NumericType;
 
  315   if ((R.size1() > 0) && (R.size2() > 0))
 
  317     VectorT v = 
static_cast<VectorT
>(boost::numeric::ublas::zero_vector<NumericType>(R.size1()));
 
  318     b_v = 
static_cast<VectorT
>(boost::numeric::ublas::zero_vector<NumericType>(R.size2()));
 
  320     for (
unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i)
 
  337 template<
typename SizeT>
 
  343     if (inds[i].
size() > max_size)
 
  344       max_size = static_cast<SizeT>(inds[i].
size());
 
  354 template<
typename MatrixT, 
typename VectorT, 
typename NumericT>
 
  361   for (
unsigned int j = ind; j < A.size1(); ++j)
 
  366       res += A(j, ind)*v(j);
 
  376 template<
typename MatrixT, 
typename VectorT>
 
  381   typedef typename MatrixT::value_type     
NumericT;
 
  390         y(j) -= b_v(i)*inn_prod;
 
  392         y(j) -= b_v(i)*inn_prod*R(j,i);
 
  403 template<
typename MatrixT, 
typename VectorT>
 
  411     tmp_v = 
static_cast<VectorT
>(
column(A,i));
 
  427 template<
typename NumericT>
 
  428 void block_qr(std::vector<std::vector<unsigned int> > & g_I,
 
  429               std::vector<std::vector<unsigned int> > & g_J,
 
  432               std::vector<cl_uint> & g_is_update,
 
  438   unsigned int bv_size = 0;
 
  439   unsigned int v_size = 0;
 
  442   unsigned int local_r_n = 0;
 
  443   unsigned int local_c_n = 0;
 
  451   std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
 
  452   std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
 
  456   std::vector<NumericT> b_v(bv_size, 
NumericT(0));
 
  457   std::vector<NumericT>   v(v_size,  
NumericT(0));
 
  462                                                static_cast<unsigned int>(
sizeof(
NumericT)*bv_size),
 
  466                                             static_cast<unsigned int>(
sizeof(
NumericT)*v_size),
 
  470                                                 static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
 
  471                                                 &(start_bv_inds[0]));
 
  474                                              static_cast<unsigned int>(
sizeof(cl_uint)*g_I.size()),
 
  477                                                                            static_cast<unsigned int>(
sizeof(cl_uint)*g_is_update.size()),
 
  490                                   static_cast<cl_uint
>(g_I.size())));
 
void householder_vector(MatrixT const &A, unsigned int j, VectorT &v, NumericT &b)
Computation of Householder vector, householder reflection c.f. Gene H. Golub, Charles F...
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Represents contigious matrices on GPU. 
void print_continious_vector(VectorT &con_v, std::vector< cl_uint > &block_ind, std::vector< std::vector< unsigned int > > const &g_J)
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements. 
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices. 
Represents an OpenCL kernel within ViennaCL. 
Implementation of the dense matrix class. 
size_type local_work_size(int index=0) const 
Returns the local work size at the respective dimension. 
OpenCL kernel file for sparse approximate inverse operations. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set. 
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
void write_to_block(VectorT &con_A_I_J, unsigned int start_ind, std::vector< unsigned int > const &I, std::vector< unsigned int > const &J, MatrixT &m)
void copy_vector(MatrixT const &A, VectorT &v, unsigned int beg_ind)
Copying part of matrix column. 
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224. 
void apply_q_trans_vec(MatrixT const &R, VectorT const &b_v, VectorT &y)
Recovery Q from matrix R and vector of betas b_v. 
Implementation of a bunch of (small) matrices on GPU. Experimental. 
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
void Print(std::ostream &ostr, InputIteratorT it_begin, InputIteratorT it_end)
A class representing local (shared) OpenCL memory. Typically used as kernel argument. 
void store_householder_vector(MatrixT &A, unsigned int ind, VectorT &v)
Storage of vector v in column(A, ind), starting from ind-1 index of a column. 
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set. 
Implementation of a bunch of vectors on GPU. Experimental. 
void custom_inner_prod(MatrixT const &A, VectorT const &v, unsigned int col_ind, unsigned int start_ind, NumericT &res)
Dot prod of particular matrix column with arbitrary vector: A(:, col_ind) 
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions. 
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements. 
Implementations of the OpenCL backend, where all contexts are stored in. 
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix. 
void get_max_block_size(std::vector< std::vector< SizeT > > const &inds, SizeT &max_size)
Getting max size of rows/columns from container of index set. 
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void custom_dot_prod(MatrixT const &A, VectorT const &v, unsigned int ind, NumericT &res)
Dot_prod(column(A, ind), v) starting from index ind+1. 
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Represents a contiguous vector on the GPU to represent a concatentation of small vectors. 
static void init(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const 
Returns the global work size at the respective dimension. 
void apply_q_trans_mat(MatrixT const &R, VectorT const &b_v, MatrixT &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
void block_qr(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on GPU. 
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
void print_continious_matrix(VectorT &con_A_I_J, std::vector< cl_uint > &blocks_ind, std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J)
void apply_householder_reflection(MatrixT &A, unsigned int iter_cnt, VectorT &v, NumericT b)
Inplace application of Householder vector to a matrix A. 
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const 
Creates a memory buffer within the context.