1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_ 
   55   template<
typename NumericT, 
unsigned int AlignmentV>
 
   65                                            viennacl::traits::opencl_handle(x),
 
   67                                            cl_uint(info_selector)
 
   81 template<
typename NumericT, 
unsigned int AlignmentV>
 
   91   bool with_alpha_beta = (alpha < NumericT(1) || alpha > 
NumericT(1)) || (beta < 0 || beta > 0);
 
   96   unsigned int alignment = AlignmentV; 
 
   97   if (use_nvidia_specific)
 
  124   if (alignment == 4 || alignment == 8)
 
  144     if (use_nvidia_specific)
 
  190 template< 
typename NumericT, 
unsigned int AlignmentV>
 
  201                            viennacl::traits::opencl_handle(d_A),
 
  206                            viennacl::traits::opencl_handle(y),
 
  222 template<
typename NumericT, 
unsigned int AlignmentV>
 
  235                            viennacl::traits::opencl_handle(d_A.lhs()),
 
  240                            viennacl::traits::opencl_handle(y),
 
  256 template<
typename NumericT, 
unsigned int AlignmentV>
 
  274                             viennacl::traits::opencl_handle(upper_bound_nonzeros_per_row_A)
 
  278   unsigned int * upper_bound_nonzeros_per_row_A_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(upper_bound_nonzeros_per_row_A.
handle());
 
  280   unsigned int max_nnz_per_row_A = 0;
 
  281   for (std::size_t i=0; i<upper_bound_nonzeros_per_row_A.
size(); ++i)
 
  282     max_nnz_per_row_A = 
std::max(max_nnz_per_row_A, upper_bound_nonzeros_per_row_A_ptr[i]);
 
  284   if (max_nnz_per_row_A > 32)
 
  287     unsigned int max_entries_in_G = 32;
 
  288     if (max_nnz_per_row_A <= 256)
 
  289       max_entries_in_G = 16;
 
  290     if (max_nnz_per_row_A <= 64)
 
  291       max_entries_in_G = 8;
 
  296                                          cl_uint(max_entries_in_G),
 
  297                                          viennacl::traits::opencl_handle(exclusive_scan_helper)
 
  302     unsigned int augmented_size = exclusive_scan_helper[A.
size1()];
 
  310     viennacl::ocl::enqueue(k_fill_A2(A2.handle1().opencl_handle(), A2.handle2().opencl_handle(), A2.handle().opencl_handle(), cl_uint(A2.size1()),
 
  311                                      viennacl::traits::opencl_handle(exclusive_scan_helper)
 
  316     viennacl::ocl::enqueue(k_fill_G1(G1.handle1().opencl_handle(), G1.handle2().opencl_handle(), G1.handle().opencl_handle(), cl_uint(G1.size1()),
 
  318                                      cl_uint(max_entries_in_G),
 
  319                                      viennacl::traits::opencl_handle(exclusive_scan_helper)
 
  347   unsigned int current_offset = 0;
 
  348   for (std::size_t i=0; i<C.
size1(); ++i)
 
  350     unsigned int tmp = row_buffer[i];
 
  351     row_buffer.set(i, current_offset);
 
  352     current_offset += tmp;
 
  354   row_buffer.
set(C.
size1(), current_offset);
 
  362   C.
reserve(current_offset, 
false);
 
  381 template<
typename NumericT, 
unsigned int MAT_AlignmentV>
 
  393                            viennacl::traits::opencl_handle(x),
 
  404 template<
typename NumericT, 
unsigned int AlignmentV>
 
  417                            viennacl::traits::opencl_handle(x),
 
  429 template<
typename NumericT, 
unsigned int AlignmentV>
 
  441                            viennacl::traits::opencl_handle(x),
 
  452 template<
typename NumericT, 
unsigned int AlignmentV>
 
  465                            viennacl::traits::opencl_handle(x),
 
  482   template<
typename NumericT, 
unsigned int AlignmentV>
 
  497                                               L.lhs().handle2().opencl_handle(),
 
  498                                               L.lhs().handle().opencl_handle(),
 
  499                                               block_indices.opencl_handle(),
 
  501                                               static_cast<cl_uint
>(x.
size())));
 
  505   template<
typename NumericT, 
unsigned int AlignmentV>
 
  520                                               U.lhs().handle2().opencl_handle(),
 
  521                                               U.lhs().handle().opencl_handle(),
 
  523                                               block_indices.opencl_handle(),
 
  525                                               static_cast<cl_uint
>(x.
size())));
 
  537 template<
typename NumericT, 
unsigned int AlignmentV>
 
  550   viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
 
  551                            viennacl::traits::opencl_handle(x),
 
  552                            cl_uint(proxy_L.lhs().size1())
 
  563 template<
typename NumericT, 
unsigned int AlignmentV>
 
  578   k.local_work_size(0, 128);
 
  579   k.global_work_size(0, k.local_work_size());
 
  580   viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
 
  581                            viennacl::traits::opencl_handle(diagonal),
 
  582                            viennacl::traits::opencl_handle(x),
 
  583                            cl_uint(proxy_L.lhs().size1())
 
  593 template<
typename NumericT, 
unsigned int AlignmentV>
 
  606   viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
 
  607                            viennacl::traits::opencl_handle(x),
 
  608                            cl_uint(proxy_U.lhs().size1())
 
  619 template<
typename NumericT, 
unsigned int AlignmentV>
 
  634   k.local_work_size(0, 128);
 
  635   k.global_work_size(0, k.local_work_size());
 
  636   viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
 
  637                            viennacl::traits::opencl_handle(diagonal),
 
  638                            viennacl::traits::opencl_handle(x),
 
  639                            cl_uint(proxy_U.lhs().size1())
 
  657 template<
typename NumericT>
 
  668   if (beta < 0 || beta > 0) 
 
  700   template<
typename NumericT, 
unsigned int AlignmentV>
 
  708     unsigned int thread_num = 128; 
 
  714                                            viennacl::traits::opencl_handle(x),
 
  715                                            cl_uint(info_selector),
 
  729 template<
typename NumericT, 
unsigned int AlignmentV>
 
  739   if (beta < 0 || beta > 0) 
 
  759   unsigned int thread_num = 128; 
 
  766                            viennacl::traits::opencl_handle(x),
 
  769                            viennacl::traits::opencl_handle(y),
 
  786 template<
typename NumericT, 
unsigned int AlignmentV>
 
  799   unsigned int thread_num = 128; 
 
  804                            viennacl::traits::opencl_handle(d_A),
 
  809                            viennacl::traits::opencl_handle(y),
 
  827 template<
typename NumericT, 
unsigned int AlignmentV>
 
  842   unsigned int thread_num = 128; 
 
  847                            viennacl::traits::opencl_handle(d_A),
 
  852                            viennacl::traits::opencl_handle(y),
 
  867 template<
typename NumericT, 
unsigned int AlignmentV>
 
  880   bool with_alpha_beta = (alpha < NumericT(1) || alpha > 
NumericT(1)) || (beta < 0 || beta > 0);
 
  894   std::stringstream ss;
 
  895   ss << 
"vec_mul_" << 1;
 
  898   unsigned int thread_num = 128;
 
  899   unsigned int group_num = 256;
 
  906                              A.
handle().opencl_handle(),
 
  907                              viennacl::traits::opencl_handle(x),
 
  910                              viennacl::traits::opencl_handle(y),
 
  922                              A.
handle().opencl_handle(),
 
  923                              viennacl::traits::opencl_handle(x),
 
  925                              viennacl::traits::opencl_handle(y),
 
  947 template<
typename NumericT, 
unsigned int AlignmentV>
 
  964                            cl_uint(sp_A.
size1()),
 
  965                            cl_uint(sp_A.
size2()),
 
  969                            viennacl::traits::opencl_handle(d_A),
 
  974                            viennacl::traits::opencl_handle(y),
 
  992 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1011                            cl_uint(sp_A.
size1()),
 
 1012                            cl_uint(sp_A.
size2()),
 
 1016                            viennacl::traits::opencl_handle(d_A.lhs()),
 
 1021                            viennacl::traits::opencl_handle(y),
 
 1034 template<
typename ScalarT, 
typename IndexT>
 
 1047   bool with_alpha_beta = (alpha < ScalarT(1) || alpha > ScalarT(1)) || (beta < 0 || beta > 0);
 
 1061   std::stringstream ss;
 
 1062   ss << 
"vec_mul_" << 1;
 
 1066   unsigned int group_num = 256;
 
 1074   if (with_alpha_beta)
 
 1078                              A.
handle().opencl_handle(),
 
 1079                              viennacl::traits::opencl_handle(x),
 
 1082                              viennacl::traits::opencl_handle(y),
 
 1091                              A.
handle().opencl_handle(),
 
 1092                              viennacl::traits::opencl_handle(x),
 
 1094                              viennacl::traits::opencl_handle(y),
 
 1105 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1118   bool with_alpha_beta = (alpha < NumericT(1) || alpha > 
NumericT(1)) || (beta < 0 || beta > 0);
 
 1134   if (with_alpha_beta)
 
 1136                              A.
handle().opencl_handle(),
 
 1140                              viennacl::traits::opencl_handle(x),
 
 1143                              viennacl::traits::opencl_handle(y),
 
 1154                              A.
handle().opencl_handle(),
 
 1158                              viennacl::traits::opencl_handle(x),
 
 1160                              viennacl::traits::opencl_handle(y),
 
 1170 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1181                            A.
handle().opencl_handle(),
 
 1189                            viennacl::traits::opencl_handle(d_A),
 
 1194                            viennacl::traits::opencl_handle(y),
 
 1203 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1216                            A.
handle().opencl_handle(),
 
 1224                            viennacl::traits::opencl_handle(d_A.lhs()),
 
 1229                            viennacl::traits::opencl_handle(y),
 
const vcl_size_t & size2() const 
Returns the number of columns. 
vcl_size_t internal_ellnnz() const 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
cl_uint stride
Increment between integers. 
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::device const & current_device() const 
Returns the current device. 
Helper class implementing an array on the host. Default case: No conversion necessary. 
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'. 
Represents an OpenCL device within ViennaCL. 
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Implementations of NMF operations using OpenCL. 
void prod_impl(const matrix_base< NumericT > &mat, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
const handle_type & handle3() const 
const vcl_size_t & size1() const 
Returns the number of rows. 
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
Represents an OpenCL kernel within ViennaCL. 
cl_uint start
Starting value of the integer stride. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
const handle_type & handle() const 
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const 
Returns the local work size at the respective dimension. 
const handle_type & handle12() const 
Returns the OpenCL handle to the (row, column) index array. 
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
A tag class representing a lower triangular matrix. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for coordinate_matrix. 
vcl_size_t internal_size1() const 
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM. 
const handle_type & handle4() const 
cl_uint vendor_id() const 
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector. 
T max(const T &lhs, const T &rhs)
Maximum. 
vcl_size_t rows_per_block() const 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding. 
vcl_size_t internal_size1() const 
Common implementations shared by OpenCL-based operations. 
const vcl_size_t & nnz() const 
Returns the number of nonzero entries. 
Main kernel class for generating OpenCL kernels for ell_matrix. 
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
const handle_type & handle2() const 
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument. 
Main kernel class for generating OpenCL kernels for compressed_matrix (except solvers). 
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for compressed_matrix operations. 
A tag class representing an upper triangular matrix. 
OpenCL kernel file for ell_matrix operations. 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
void clear()
Resets all entries to zero. 
const handle_type & handle3() const 
Returns the OpenCL handle to the row index array. 
Implementation of a smart-pointer-like class for handling OpenCL handles. 
result_of::size_type< T >::type start(T const &obj)
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
static void init(viennacl::ocl::context &ctx)
Main kernel class for triangular solver OpenCL kernels for compressed_matrix. 
Main kernel class for generating OpenCL kernels for ell_matrix. 
Common routines for single-threaded or OpenMP-enabled execution on CPU. 
OpenCL kernel file for sliced_ell_matrix operations. 
vcl_size_t maxnnz() const 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
const handle_type & handle3() const 
Returns the OpenCL handle to the group start index array. 
OpenCL kernel file for hyb_matrix operations. 
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B. 
const handle_type & handle3() const 
Returns the OpenCL handle to the row block array. 
void clear()
Resets all entries to zero. Does not change the size of the vector. 
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
Representation of an OpenCL kernel in ViennaCL. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for vector operations. 
void set(vcl_size_t index, U value)
size_type size() const 
Returns the length of the vector (cf. std::vector) 
const vcl_size_t & nnz1() const 
Returns the number of nonzero entries. 
vcl_size_t ell_nnz() const 
A tag class representing a lower triangular matrix with unit diagonal. 
size_type global_work_size(int index=0) const 
Returns the global work size at the respective dimension. 
OpenCL kernel file for coordinate_matrix operations. 
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices. 
vcl_size_t raw_size() const 
Returns the number of bytes of the currently active buffer. 
A sparse square matrix in compressed sparse rows format. 
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan. 
const handle_type & handle5() const 
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
const vcl_size_t & blocks1() const 
Returns the internal number of row blocks for an adaptive SpMV. 
vcl_size_t internal_maxnnz() const 
Implementation of the ViennaCL scalar class. 
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix. 
const handle_type & handle() const 
Returns the memory handle. 
static void init(viennacl::ocl::context &ctx)
size_t max_work_group_size() const 
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
A tag class representing an upper triangular matrix with unit diagonal. 
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride. 
Main kernel class for generating OpenCL kernels for hyb_matrix. 
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void switch_memory_context(viennacl::context new_ctx)
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)