1 #ifndef VIENNACL_SLICED_ELL_MATRIX_HPP_ 
    2 #define VIENNACL_SLICED_ELL_MATRIX_HPP_ 
   45 template<
typename ScalarT, 
typename IndexT >
 
   46 class sliced_ell_matrix
 
   64       rows_per_block_(num_rows_per_block_) {}
 
   73 #ifdef VIENNACL_WITH_OPENCL 
   76       columns_per_block_.opencl_handle().context(ctx.opencl_context());
 
   77       column_indices_.opencl_handle().context(ctx.opencl_context());
 
   78       block_start_.opencl_handle().context(ctx.opencl_context());
 
   79       elements_.opencl_handle().context(ctx.opencl_context());
 
   90     std::vector<ScalarT> host_elements(1);
 
  121 #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment 
  122   template<
typename CPUMatrixT>
 
  125   template<
typename CPUMatrixT, 
typename ScalarT2, 
typename IndexT2>
 
  140 template<
typename CPUMatrixT, 
typename ScalarT, 
typename IndexT>
 
  143   assert( (gpu_matrix.size1() == 0 || 
viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && 
bool(
"Size mismatch") );
 
  144   assert( (gpu_matrix.size2() == 0 || 
viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && 
bool(
"Size mismatch") );
 
  146   if (gpu_matrix.rows_per_block() == 0) 
 
  147     gpu_matrix.rows_per_block_ = 32;
 
  152     IndexT columns_in_current_block = 0;
 
  155     for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
 
  158       for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
 
  161       columns_in_current_block = 
std::max(columns_in_current_block, static_cast<IndexT>(entries_in_row));
 
  164       if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)
 
  167         total_element_buffer_size += columns_in_current_block * gpu_matrix.rows_per_block();
 
  168         columns_in_block_buffer.set(row_it.index1() / gpu_matrix.rows_per_block(), columns_in_current_block);
 
  169         columns_in_current_block = 0;
 
  174     gpu_matrix.rows_ = cpu_matrix.size1();
 
  175     gpu_matrix.cols_ = cpu_matrix.size2();
 
  179     std::vector<ScalarT> elements(total_element_buffer_size, 0);
 
  184     for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
 
  188       for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
 
  190         vcl_size_t buffer_index = block_offset + entry_in_row * gpu_matrix.rows_per_block() + row_in_block;
 
  191         coords.set(buffer_index, col_it.index2());
 
  192         elements[buffer_index] = *col_it;
 
  199       if ( (row_it.index1() % gpu_matrix.rows_per_block() == gpu_matrix.rows_per_block() - 1)
 
  202         block_start.
set(block_index, static_cast<IndexT>(block_offset));
 
  203         block_offset += columns_in_block_buffer[block_index] * gpu_matrix.rows_per_block();
 
  223 template<
typename IndexT, 
typename NumericT, 
typename IndexT2>
 
  224 void copy(std::vector< std::map<IndexT, NumericT> > 
const & cpu_matrix,
 
  228   for (
vcl_size_t i=0; i<cpu_matrix.size(); ++i)
 
  230     if (cpu_matrix[i].
size() > 0)
 
  231       max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
 
  286   template<
typename ScalarT, 
typename IndexT>
 
  287   struct op_executor<vector_base<ScalarT>, 
op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
 
  289     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_base<ScalarT>, op_prod> 
const & rhs)
 
  303   template<
typename ScalarT, 
typename IndexT>
 
  304   struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
 
  306     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_base<ScalarT>, op_prod> 
const & rhs)
 
  320   template<
typename ScalarT, 
typename IndexT>
 
  321   struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_base<ScalarT>, op_prod> >
 
  323     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_base<ScalarT>, op_prod> 
const & rhs)
 
  339   template<
typename ScalarT, 
typename IndexT, 
typename LHS, 
typename RHS, 
typename OP>
 
  340   struct op_executor<vector_base<ScalarT>, 
op_assign, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  342     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
  350   template<
typename ScalarT, 
typename IndexT, 
typename LHS, 
typename RHS, 
typename OP>
 
  351   struct op_executor<vector_base<ScalarT>, op_inplace_add, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  353     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
  363   template<
typename ScalarT, 
typename IndexT, 
typename LHS, 
typename RHS, 
typename OP>
 
  364   struct op_executor<vector_base<ScalarT>, op_inplace_sub, vector_expression<const sliced_ell_matrix<ScalarT, IndexT>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  366     static void apply(vector_base<ScalarT> & lhs, vector_expression<
const sliced_ell_matrix<ScalarT, IndexT>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
const handle_type & handle3() const 
Helper class implementing an array on the host. Default case: No conversion necessary. 
vcl_size_t element_size() const 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
const handle_type & handle1() const 
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
const handle_type & handle2() const 
This file provides the forward declarations for the main types used within ViennaCL. 
T max(const T &lhs, const T &rhs)
Maximum. 
vcl_size_t rows_per_block() const 
friend void copy(CPUMatrixT const &cpu_matrix, sliced_ell_matrix< ScalarT2, IndexT2 > &gpu_matrix)
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< ScalarT >::ResultType > value_type
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
sliced_ell_matrix(viennacl::context ctx)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
Implementations of operations using sparse matrices. 
sliced_ell_matrix(size_type num_rows, size_type num_cols, size_type num_rows_per_block_=0)
Standard constructor for setting the row and column sizes as well as the block size. 
viennacl::memory_types memory_type() const 
const handle_type & handle() const 
vcl_size_t internal_size2() const 
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
vcl_size_t internal_size1() const 
viennacl::backend::mem_handle handle_type