1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP 
    2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP 
   38 #include "boost/numeric/ublas/vector.hpp" 
   39 #include "boost/numeric/ublas/matrix.hpp" 
   40 #include "boost/numeric/ublas/matrix_proxy.hpp" 
   41 #include "boost/numeric/ublas/vector_proxy.hpp" 
   42 #include "boost/numeric/ublas/storage.hpp" 
   43 #include "boost/numeric/ublas/io.hpp" 
   44 #include "boost/numeric/ublas/lu.hpp" 
   45 #include "boost/numeric/ublas/triangular.hpp" 
   46 #include "boost/numeric/ublas/matrix_expression.hpp" 
   79   template<
typename T1, 
typename T2>
 
   80   bool operator()(std::pair<T1, T2> 
const & left, std::pair<T1, T2> 
const & right)
 
   82     return static_cast<double>(left.second) > static_cast<double>(right.second);
 
   93 template<
typename MatrixT>
 
   98   typedef typename MatrixT::value_type        NumericType;
 
  100   vcl_size_t row_n = R_n.size1() - (A.size1() - R.size2());
 
  101   MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(R.size1() + row_n, R.size2() + A.size2());
 
  107                                                                                                             R.size2() + A.size2())) +=
 
  111   if (R_n.size1() > 0 && R_n.size2() > 0)
 
  123 template<
typename VectorT>
 
  127   typedef typename VectorT::value_type          NumericType;
 
  129   VectorT w  = boost::numeric::ublas::zero_vector<NumericType>(v.size() + v_n.size());
 
  140 template<
typename SparseVectorT, 
typename NumericT>
 
  144   for (
typename SparseVectorT::const_iterator vec_it  = v.begin(); vec_it != v.end(); ++vec_it)
 
  145     norm += (vec_it->second)*(vec_it->second);
 
  147   norm = std::sqrt(norm);
 
  156 template<
typename SparseVectorT, 
typename NumericT>
 
  158                        SparseVectorT 
const & 
v2,
 
  161   typename SparseVectorT::const_iterator v_it1 = v1.begin();
 
  162   typename SparseVectorT::const_iterator v_it2 = v2.begin();
 
  164   while ((v_it1 != v1.end())&&(v_it2 != v2.end()))
 
  166     if (v_it1->first == v_it2->first)
 
  168       res_v += (v_it1->second)*(v_it2->second);
 
  172     else if (v_it1->first < v_it2->first)
 
  187 template<
typename SparseVectorT, 
typename NumericT>
 
  189                             SparseVectorT 
const & res,
 
  190                             std::vector<unsigned int> & J,
 
  191                             std::vector<unsigned int> & J_u,
 
  194   std::vector<std::pair<unsigned int, NumericT> > p;
 
  198   for (
typename SparseVectorT::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it)
 
  205       p.push_back(std::pair<unsigned int, NumericT>(res_it->first, (inprod*inprod)/(norm2*norm2)));
 
  210   while ((cur_size < J.size()) && (p.size() > 0))
 
  212     J_u.push_back(p[0].first);
 
  217   return (cur_size > 0);
 
  227 template<
typename SparseVectorT>
 
  229                     std::vector<unsigned int>  
const & I,
 
  230                     std::vector<unsigned int>  
const & J_n,
 
  231                     std::vector<unsigned int>        & I_n)
 
  235     for (
typename SparseVectorT::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it)
 
  238         I_n.push_back(col_it->first);
 
  249 template<
typename MatrixT>
 
  251                         MatrixT 
const & A_I_J_u,
 
  254   typedef typename MatrixT::value_type     NumericType;
 
  256   vcl_size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
 
  261   MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(row_n, col_n);
 
  287 template<
typename SparseMatrixT,
 
  288          typename SparseVectorT,
 
  289          typename DenseMatrixT,
 
  292                   std::vector<SparseVectorT> 
const & A_v_c,
 
  293                   std::vector<SparseVectorT>       & g_res,
 
  294                   std::vector<bool> & g_is_update,
 
  295                   std::vector<std::vector<unsigned int> >& g_I,
 
  296                   std::vector<std::vector<unsigned int> >& g_J,
 
  297                   std::vector<VectorT>      & g_b_v,
 
  298                   std::vector<DenseMatrixT> & g_A_I_J,
 
  301   typedef typename DenseMatrixT::value_type     NumericType;
 
  304   std::vector<std::vector<unsigned int> > g_J_u(g_J.size());   
 
  305   std::vector<std::vector<unsigned int> > g_I_u(g_J.size());   
 
  306   std::vector<DenseMatrixT> g_A_I_J_u(g_J.size());             
 
  307   std::vector<DenseMatrixT> g_A_I_u_J_u(g_J.size());           
 
  308   std::vector<VectorT>      g_b_v_u(g_J.size());               
 
  310 #ifdef VIENNACL_WITH_OPENMP 
  311   #pragma omp parallel for 
  313   for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
 
  315     if (g_is_update[static_cast<vcl_size_t>(i)])
 
  317       if (buildAugmentedIndexSet<SparseVectorT, NumericType>(A_v_c, g_res[static_cast<vcl_size_t>(i)], g_J[
static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], tag))
 
  320         initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], g_I[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
 
  322         apply_q_trans_mat(g_A_I_J[static_cast<vcl_size_t>(i)], g_b_v[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
 
  324         buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
 
  325         initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
 
  327         QRBlockComposition(g_A_I_J[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
 
  329         single_qr(g_A_I_u_J_u[static_cast<vcl_size_t>(i)], g_b_v_u[static_cast<vcl_size_t>(i)]);
 
  331         composeNewR(g_A_I_J_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)], g_A_I_J[static_cast<vcl_size_t>(i)]);
 
  332         composeNewVector(g_b_v_u[static_cast<vcl_size_t>(i)], g_b_v[static_cast<vcl_size_t>(i)]);
 
  339         g_is_update[
static_cast<vcl_size_t>(i)] = 
false;
 
  360 template<
typename NumericT>
 
  362                             std::vector<std::vector<unsigned int> > 
const & g_I,
 
  366                             std::vector<cl_uint> & g_is_update,
 
  370   unsigned int local_r_n = 0;
 
  371   unsigned int local_c_n = 0;
 
  372   unsigned int sz_blocks = 0;
 
  378   std::vector<cl_uint> matrix_dims(g_I.size()*2, 
static_cast<cl_uint
>(0));
 
  379   std::vector<cl_uint> blocks_ind(g_I.size() + 1, 
static_cast<cl_uint
>(0));
 
  384                                                                            static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
 
  395                                         static_cast<cl_uint
>(g_I.size())));
 
  405 template<
typename SizeT>
 
  407                           std::vector<std::vector<SizeT> > 
const & g_J,
 
  408                           std::vector<std::vector<SizeT> > 
const & g_I_u,
 
  409                           std::vector<std::vector<SizeT> >       & g_I_q)
 
  411 #ifdef VIENNACL_WITH_OPENMP 
  412   #pragma omp parallel for 
  414   for (
long i = 0; i < static_cast<long>(g_I.size()); ++i)
 
  416     for (
vcl_size_t j = g_J[static_cast<vcl_size_t>(i)].size(); j < g_I[static_cast<vcl_size_t>(i)].
size(); ++j)
 
  417       g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I[static_cast<vcl_size_t>(i)][j]);
 
  419     for (
vcl_size_t j = 0; j < g_I_u[static_cast<vcl_size_t>(i)].
size(); ++j)
 
  420       g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I_u[static_cast<vcl_size_t>(i)][j]);
 
  438 template<
typename NumericT>
 
  440                        std::vector<std::vector<unsigned int> > 
const& g_I,
 
  441                        std::vector<std::vector<unsigned int> > 
const& g_J_u,
 
  442                        std::vector<std::vector<unsigned int> > 
const& g_I_u,
 
  443                        std::vector<std::vector<unsigned int> >& g_I_q,
 
  447                        std::vector<cl_uint> & g_is_update,
 
  455   unsigned int sz_blocks;
 
  456   std::vector<cl_uint> matrix_dims(g_I.size()*2, 
static_cast<cl_uint
>(0));
 
  457   std::vector<cl_uint> blocks_ind(g_I.size() + 1, 
static_cast<cl_uint
>(0));
 
  461   std::vector<NumericT> con_A_I_J_q(sz_blocks, static_cast<NumericT>(0));
 
  466                                                     static_cast<unsigned int>(
sizeof(
NumericT)*sz_blocks),
 
  471                                                      static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
 
  476                                                       static_cast<unsigned int>(
sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
 
  481                                                                            static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
 
  501                                               static_cast<unsigned int>(g_I.size())));
 
  513                                               static_cast<unsigned int>(g_I.size())));
 
  532 template<
typename NumericT>
 
  533 void assemble_r(std::vector<std::vector<unsigned int> > & g_I,
 
  534                 std::vector<std::vector<unsigned int> > & g_J,
 
  540                 std::vector<cl_uint> & g_is_update,
 
  544   std::vector<cl_uint> matrix_dims(g_I.size()*2, 
static_cast<cl_uint
>(0));
 
  545   std::vector<cl_uint> blocks_ind(g_I.size() + 1, 
static_cast<cl_uint
>(0));
 
  546   std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
 
  547   unsigned int sz_blocks, bv_size;
 
  553   std::vector<NumericT> con_A_I_J_r(sz_blocks, static_cast<NumericT>(0));
 
  554   std::vector<NumericT> b_v_r(bv_size, static_cast<NumericT>(0));
 
  559                                                     static_cast<unsigned int>(
sizeof(
NumericT)*sz_blocks),
 
  564                                                      static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
 
  569                                                      static_cast<unsigned int>(
sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
 
  574                                                  static_cast<unsigned int>(
sizeof(
NumericT)*bv_size),
 
  579                                                   static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
 
  580                                                   &(start_bv_r_inds[0]));
 
  584                                                                            static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
 
  595                                           g_is_update_vcl, 
static_cast<cl_uint
>(g_I.size())));
 
  599   bv_assembly_kernel.global_work_size(0, 256);
 
  603                                             static_cast<cl_uint
>(g_I.size())));
 
  624 template<
typename NumericT, 
unsigned int AlignmentV, 
typename SparseVectorT>
 
  626                   std::vector<SparseVectorT> 
const & A_v_c,
 
  627                   std::vector<cl_uint> & g_is_update,
 
  628                   std::vector<SparseVectorT> & g_res,
 
  629                   std::vector<std::vector<unsigned int> > & g_J,
 
  630                   std::vector<std::vector<unsigned int> > & g_I,
 
  637   std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
 
  639   std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
 
  641   std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
 
  650 #ifdef VIENNACL_WITH_OPENMP 
  651   #pragma omp parallel for 
  653   for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
 
  655     if (g_is_update[static_cast<vcl_size_t>(i)])
 
  657       if (buildAugmentedIndexSet<SparseVectorT, NumericT>(A_v_c, g_res[static_cast<vcl_size_t>(i)], g_J[
static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], tag))
 
  658           buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
 
  662   block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block);
 
  664   block_q_multiplication<NumericT>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, ctx);
 
  666   block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block);
 
  667   assemble_qr_block<NumericT>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
 
  668                               g_A_I_u_J_u_vcl, g_is_update, is_empty_block, ctx);
 
  670   block_qr<NumericT>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, ctx);
 
  672 #ifdef VIENNACL_WITH_OPENMP 
  673   #pragma omp parallel for 
  675   for (
long i = 0; i < static_cast<long>(g_J.size()); ++i)
 
  680   assemble_r<NumericT>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl,  g_bv_vcl,  g_bv_u_vcl, g_is_update, ctx);
 
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
bool buildAugmentedIndexSet(std::vector< SparseVectorT > const &A_v_c, SparseVectorT const &res, std::vector< unsigned int > &J, std::vector< unsigned int > &J_u, spai_tag const &tag)
Building a new set of column indices J_u, cf. Kallischko dissertation p.31. 
Represents contigious matrices on GPU. 
Implementations of dense matrix related operations including matrix-vector products. 
Helper functor for comparing std::pair<> based on the second member. 
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements. 
void sparse_norm_2(SparseVectorT const &v, NumericT &norm)
Computation of Euclidean norm for sparse vector. 
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices. 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
bool operator()(std::pair< T1, T2 > const &left, std::pair< T1, T2 > const &right)
Represents an OpenCL kernel within ViennaCL. 
Implementation of the dense matrix class. 
size_type local_work_size(int index=0) const 
Returns the local work size at the respective dimension. 
OpenCL kernel file for sparse approximate inverse operations. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void compute_blocks_size(std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
void get_size(std::vector< std::vector< SizeT > > const &inds, SizeT &size)
Computes size of particular container of index set. 
void assemble_qr_row_inds(std::vector< std::vector< SizeT > > const &g_I, std::vector< std::vector< SizeT > > const &g_J, std::vector< std::vector< SizeT > > const &g_I_u, std::vector< std::vector< SizeT > > &g_I_q)
Assembly of container of index row sets: I_q, row indices for new "QR block". 
Main implementation of SPAI (not FSPAI). Experimental. 
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations. 
void block_update(SparseMatrixT const &A, std::vector< SparseVectorT > const &A_v_c, std::vector< SparseVectorT > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorT > &g_b_v, std::vector< DenseMatrixT > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner. 
viennacl::ocl::context const & context() const 
bool isInIndexSet(std::vector< SizeT > const &J, SizeT ind)
Determines if element ind is in set {J}. 
void buildNewRowSet(std::vector< SparseVectorT > const &A_v_c, std::vector< unsigned int > const &I, std::vector< unsigned int > const &J_n, std::vector< unsigned int > &I_n)
Building a new indices to current set of row indices I_n, cf. Kallischko dissertation p...
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
void single_qr(MatrixT &R, VectorT &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224. 
void block_assembly(viennacl::compressed_matrix< NumericT, AlignmentV > const &A, std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J. 
viennacl::vector< float > v1
Implementation of a bunch of (small) matrices on GPU. Experimental. 
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
A class representing local (shared) OpenCL memory. Typically used as kernel argument. 
void insert(MatrixType &matrix, long row, long col, ScalarType value)
Implementation of a simultaneous QR factorization of multiple matrices. Experimental. 
void composeNewVector(VectorT const &v_n, VectorT &v)
Composition of new vector of coefficients beta from QR factorizations(necessary for Q recovery) ...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
void init_start_inds(std::vector< std::vector< SizeT > > const &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set. 
Implementations of incomplete factorization preconditioners. Convenience header file. 
Implementation of a static SPAI. Experimental. 
void assemble_r(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_matrix &g_A_I_J_u_vcl, block_matrix &g_A_I_u_J_u_vcl, block_vector &g_bv_vcl, block_vector &g_bv_vcl_u, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs assembly for new R matrix on GPU. 
Implementation of the compressed_matrix class. 
Implementation of a bunch of vectors on GPU. Experimental. 
Implementations of operations using sparse matrices. 
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions. 
void assemble_qr_block(std::vector< std::vector< unsigned int > > const &g_J, std::vector< std::vector< unsigned int > > const &g_I, std::vector< std::vector< unsigned int > > const &g_J_u, std::vector< std::vector< unsigned int > > const &g_I_u, std::vector< std::vector< unsigned int > > &g_I_q, block_matrix &g_A_I_J_u_vcl, viennacl::ocl::handle< cl_mem > &matrix_dimensions, block_matrix &g_A_I_u_J_u_vcl, std::vector< cl_uint > &g_is_update, bool is_empty_block, viennacl::context ctx)
Performs assembly for new QR block. 
void initProjectSubMatrix(SparseMatrixT const &A_in, std::vector< unsigned int > const &J, std::vector< unsigned int > &I, DenseMatrixT &A_out)
Initializes a dense matrix from a sparse one. 
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements. 
Implementation of the spai tag holding SPAI configuration parameters. Experimental. 
The conjugate gradient method is implemented here. 
Implementations of the OpenCL backend, where all contexts are stored in. 
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix. 
void sparse_inner_prod(SparseVectorT const &v1, SparseVectorT const &v2, NumericT &res_v)
Dot product of two sparse vectors. 
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
void get_max_block_size(std::vector< std::vector< SizeT > > const &inds, SizeT &max_size)
Getting max size of rows/columns from container of index set. 
viennacl::vector< int > v2
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
void block_q_multiplication(std::vector< std::vector< unsigned int > > const &g_J_u, std::vector< std::vector< unsigned int > > const &g_I, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, block_matrix &g_A_I_J_u_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs multiplication Q'*A(I, \tilde J) on GPU. 
Represents a contiguous vector on the GPU to represent a concatentation of small vectors. 
static void init(viennacl::ocl::context &ctx)
size_type global_work_size(int index=0) const 
Returns the global work size at the respective dimension. 
void apply_q_trans_mat(MatrixT const &R, VectorT const &b_v, MatrixT &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
A sparse square matrix in compressed sparse rows format. 
Implementation of the ViennaCL scalar class. 
void QRBlockComposition(MatrixT const &A_I_J, MatrixT const &A_I_J_u, MatrixT &A_I_u_J_u)
Composition of new block for QR factorization cf. Kallischko dissertation p.82, figure 4...
void composeNewR(MatrixT const &A, MatrixT const &R_n, MatrixT &R)
Composition of new matrix R, that is going to be used in Least Square problem solving. 
double getResidualThreshold() const 
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const 
Creates a memory buffer within the context.