1 #ifndef VIENNACL_HYB_MATRIX_HPP_ 
    2 #define VIENNACL_HYB_MATRIX_HPP_ 
   37 template<
typename NumericT, 
unsigned int AlignmentV  >
 
   55 #ifdef VIENNACL_WITH_OPENCL 
   58       ell_coords_.opencl_handle().context(ctx.opencl_context());
 
   59       ell_elements_.opencl_handle().context(ctx.opencl_context());
 
   61       csr_rows_.opencl_handle().context(ctx.opencl_context());
 
   62       csr_cols_.opencl_handle().context(ctx.opencl_context());
 
   63       csr_elements_.opencl_handle().context(ctx.opencl_context());
 
   85     host_elements.resize(1);
 
  112 #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment 
  113   template<
typename CPUMatrixT>
 
  114   friend void copy(
const CPUMatrixT & cpu_matrix, 
hyb_matrix & gpu_matrix );
 
  116   template<
typename CPUMatrixT, 
typename T, 
unsigned int ALIGN>
 
  135 template<
typename CPUMatrixT, 
typename NumericT, 
unsigned int AlignmentV>
 
  138   assert( (gpu_matrix.size1() == 0 || 
viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && 
bool(
"Size mismatch") );
 
  139   assert( (gpu_matrix.size2() == 0 || 
viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && 
bool(
"Size mismatch") );
 
  141   if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
 
  145     std::vector<vcl_size_t> hist_entries(cpu_matrix.size2() + 1, 0);
 
  147     for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
 
  150       for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
 
  155       hist_entries[num_entries] += 1;
 
  156       max_entries_per_row = 
std::max(max_entries_per_row, num_entries);
 
  160     for (
vcl_size_t ind = 0; ind <= max_entries_per_row; ind++)
 
  162       sum += hist_entries[ind];
 
  166         max_entries_per_row = ind;
 
  172     gpu_matrix.ellnnz_ = max_entries_per_row;
 
  173     gpu_matrix.rows_ = cpu_matrix.size1();
 
  174     gpu_matrix.cols_ = cpu_matrix.size2();
 
  176     vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz();
 
  180     std::vector<unsigned int> csr_cols;
 
  182     std::vector<NumericT> ell_elements(nnz);
 
  183     std::vector<NumericT> csr_elements;
 
  187     for (
typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
 
  191       csr_rows.set(row_it.index1(), csr_index);
 
  193       for (
typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
 
  195         if (data_index < max_entries_per_row)
 
  197           ell_coords.
set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
 
  198           ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
 
  202           csr_cols.push_back(static_cast<unsigned int>(col_it.index2()));
 
  203           csr_elements.push_back(*col_it);
 
  213     if (csr_cols.empty())
 
  215       csr_cols.push_back(0);
 
  216       csr_elements.push_back(0);
 
  219     csr_rows.
set(csr_rows.size() - 1, csr_index);
 
  221     gpu_matrix.csrnnz_ = csr_cols.
size();
 
  225       csr_cols_for_gpu.
set(i, csr_cols[i]);
 
  242 template<
typename IndexT, 
typename NumericT, 
unsigned int AlignmentV>
 
  243 void copy(std::vector< std::map<IndexT, NumericT> > 
const & cpu_matrix,
 
  247   for (
vcl_size_t i=0; i<cpu_matrix.size(); ++i)
 
  249     if (cpu_matrix[i].
size() > 0)
 
  250       max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
 
  259 template<
typename CPUMatrixT, 
typename NumericT, 
unsigned int AlignmentV>
 
  265   if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
 
  267     std::vector<NumericT> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
 
  270     std::vector<NumericT> csr_elements(gpu_matrix.csr_nnz());
 
  283       for (
vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++)
 
  287         NumericT val = ell_elements[offset];
 
  288         if (val <= 0 && val >= 0) 
 
  291         if (ell_coords[offset] >= gpu_matrix.size2())
 
  293           std::cerr << 
"ViennaCL encountered invalid data " << offset << 
" " << ind << 
" " << row << 
" " << ell_coords[offset] << 
" " << gpu_matrix.size2() << std::endl;
 
  297         cpu_matrix(row, ell_coords[offset]) = val;
 
  303         if (val <= 0 && val >= 0) 
 
  306         if (csr_cols[ind] >= gpu_matrix.size2())
 
  308           std::cerr << 
"ViennaCL encountered invalid data " << std::endl;
 
  312         cpu_matrix(
row, csr_cols[ind]) = val;
 
  323 template<
typename NumericT, 
unsigned int AlignmentV, 
typename IndexT>
 
  325           std::vector< std::map<IndexT, NumericT> > & cpu_matrix)
 
  327   if (cpu_matrix.size() == 0)
 
  328     cpu_matrix.resize(gpu_matrix.size1());
 
  330   assert(cpu_matrix.size() == gpu_matrix.size1() && bool(
"Matrix dimension mismatch!"));
 
  347   template<
typename T, 
unsigned int A>
 
  348   struct op_executor<vector_base<T>, 
op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
 
  350     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_base<T>, op_prod> 
const & rhs)
 
  364   template<
typename T, 
unsigned int A>
 
  365   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
 
  367     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_base<T>, op_prod> 
const & rhs)
 
  381   template<
typename T, 
unsigned int A>
 
  382   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
 
  384     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_base<T>, op_prod> 
const & rhs)
 
  400   template<
typename T, 
unsigned int A, 
typename LHS, 
typename RHS, 
typename OP>
 
  401   struct op_executor<vector_base<T>, 
op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  403     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
  411   template<
typename T, 
unsigned int A, 
typename LHS, 
typename RHS, 
typename OP>
 
  412   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  414     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
  424   template<
typename T, 
unsigned int A, 
typename LHS, 
typename RHS, 
typename OP>
 
  425   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
 
  427     static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>, 
const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
 
vcl_size_t internal_ellnnz() const 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Helper class implementing an array on the host. Default case: No conversion necessary. 
vcl_size_t element_size() const 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
const handle_type & handle3() const 
const handle_type & handle() const 
void clear()
Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity p...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
vcl_size_t internal_size1() const 
This file provides the forward declarations for the main types used within ViennaCL. 
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM. 
const handle_type & handle4() const 
T max(const T &lhs, const T &rhs)
Maximum. 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
NumericT csr_threshold() const 
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
vcl_size_t csr_nnz() const 
const handle_type & handle2() const 
viennacl::backend::mem_handle handle_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
vcl_size_t internal_size2() const 
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< NumericT >::ResultType > value_type
Implementations of operations using sparse matrices. 
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
viennacl::memory_types memory_type() const 
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
vcl_size_t ell_nnz() const 
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
hyb_matrix(viennacl::context ctx)
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
const handle_type & handle5() const 
void prod_impl(const matrix_base< NumericT > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
friend void copy(const CPUMatrixT &cpu_matrix, hyb_matrix< T, ALIGN > &gpu_matrix)
vcl_size_t raw_size() const 
void csr_threshold(NumericT thr)