1 #ifndef VIENNACL_MATRIX_PROXY_HPP_ 
    2 #define VIENNACL_MATRIX_PROXY_HPP_ 
   38   template<
typename NumericT, 
typename MatrixT>
 
   44   template<
typename NumericT>
 
   52   template<
typename NumericT, 
typename MatrixT>
 
   58   template<
typename NumericT>
 
   70 template<
typename MatrixType>
 
   71 class matrix_range : 
public matrix_base<typename MatrixType::cpu_value_type>
 
   73   typedef matrix_base<typename MatrixType::cpu_value_type>    base_type;
 
   74   typedef matrix_range<MatrixType>                            self_type;
 
   87                range const & row_range,
 
   94                range const & row_range,
 
  106   using base_type::operator=;
 
  109   template<
typename OtherNumericT, 
typename F>
 
  112   template<
typename OtherNumericT, 
typename F>
 
  115   template<
typename OtherNumericT, 
typename F>
 
  119 template<
typename MatrixType>
 
  127                range const & row_range,
 
  134                range const & row_range,
 
  146 template<
typename CPUMatrixT, 
typename NumericT>
 
  147 void copy(
const CPUMatrixT & cpu_matrix,
 
  152           && 
bool(
"Matrix size mismatch!"));
 
  154   if ( gpu_matrix_range.start2() != 0)
 
  156     std::vector<NumericT> entries(gpu_matrix_range.size2());
 
  159     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  161       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  162         entries[j] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  164       vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
 
  165       vcl_size_t num_entries = gpu_matrix_range.size2();
 
  173     std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
 
  176     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  177       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  178         entries[i*gpu_matrix_range.internal_size2() + j] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  180     vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
 
  181     vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
 
  188 template<
typename CPUMatrixT, 
typename NumericT>
 
  189 void copy(
const CPUMatrixT & cpu_matrix,
 
  194           && 
bool(
"Matrix size mismatch!"));
 
  196   if ( gpu_matrix_range.start1() != 0 ||  gpu_matrix_range.size1() != gpu_matrix_range.size1())
 
  198     std::vector<NumericT> entries(gpu_matrix_range.size1());
 
  201     for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  203       for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  204         entries[i] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  206       vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
 
  207       vcl_size_t num_entries = gpu_matrix_range.size1();
 
  215     std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
 
  218     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  219       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  220         entries[i + j*gpu_matrix_range.internal_size1()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  222     vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
 
  223     vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
 
  237 template<
typename CPUMatrixT, 
typename NumericT>
 
  239           CPUMatrixT & cpu_matrix)
 
  243           && 
bool(
"Matrix size mismatch!"));
 
  245   if ( gpu_matrix_range.start2() != 0)
 
  247     std::vector<NumericT> entries(gpu_matrix_range.size2());
 
  250     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  252       vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
 
  253       vcl_size_t num_entries = gpu_matrix_range.size2();
 
  257       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  258         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[j];
 
  264     std::vector<NumericT> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
 
  266     vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
 
  270     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  271       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  272         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i*gpu_matrix_range.internal_size2() + j];
 
  279 template<
typename CPUMatrixT, 
typename NumericT>
 
  281           CPUMatrixT & cpu_matrix)
 
  285           && 
bool(
"Matrix size mismatch!"));
 
  287   if ( gpu_matrix_range.start1() != 0)
 
  289     std::vector<NumericT> entries(gpu_matrix_range.size1());
 
  292     for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  294       vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
 
  295       vcl_size_t num_entries = gpu_matrix_range.size1();
 
  299       for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  300         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i];
 
  306     std::vector<NumericT> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
 
  309     vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
 
  310     vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
 
  314     for (
vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
 
  315       for (
vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
 
  316         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i + j*gpu_matrix_range.internal_size1()];
 
  325 template<
typename MatrixType>
 
  328   assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
 
  334 template<
typename MatrixType>
 
  337   assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of range invalid!"));
 
  361 template<
typename MatrixType>
 
  362 class matrix_slice : 
public matrix_base<typename MatrixType::cpu_value_type>
 
  364   typedef matrix_base<typename MatrixType::cpu_value_type>    base_type;
 
  365   typedef matrix_slice<MatrixType>                            self_type;
 
  378                slice const & row_slice,
 
  385                slice const & row_slice,
 
  397   using base_type::operator=;
 
  400   template<
typename OtherNumericT, 
typename F>
 
  403   template<
typename OtherNumericT, 
typename F>
 
  406   template<
typename OtherNumericT, 
typename F>
 
  410 template<
typename MatrixType>
 
  418                slice const & row_slice,
 
  425                slice const & row_slice,
 
  438 template<
typename CPUMatrixT, 
typename NumericT>
 
  439 void copy(
const CPUMatrixT & cpu_matrix,
 
  444           && 
bool(
"Matrix size mismatch!"));
 
  446   if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
 
  448     vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); 
 
  450     std::vector<NumericT> entries(num_entries);
 
  453     for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
 
  455       vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
 
  458       for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
 
  459         entries[j * gpu_matrix_slice.stride2()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  467 template<
typename CPUMatrixT, 
typename NumericT>
 
  468 void copy(
const CPUMatrixT & cpu_matrix,
 
  473           && 
bool(
"Matrix size mismatch!"));
 
  476   if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
 
  478     vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); 
 
  480     std::vector<NumericT> entries(num_entries);
 
  483     for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
 
  485       vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
 
  489       for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
 
  490         entries[i * gpu_matrix_slice.stride1()] = detail::matrix_access<NumericT>(cpu_matrix, i, j);
 
  505 template<
typename CPUMatrixT, 
typename NumericT>
 
  507           CPUMatrixT & cpu_matrix)
 
  511           && 
bool(
"Matrix size mismatch!"));
 
  513   if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
 
  515     vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); 
 
  517     std::vector<NumericT> entries(num_entries);
 
  520     for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
 
  522       vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
 
  526       for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
 
  527         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[j * gpu_matrix_slice.stride2()];
 
  535 template<
typename CPUMatrixT, 
typename NumericT>
 
  537           CPUMatrixT & cpu_matrix)
 
  541           && 
bool(
"Matrix size mismatch!"));
 
  543   if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
 
  545     vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); 
 
  547     std::vector<NumericT> entries(num_entries);
 
  550     for (
vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
 
  552       vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
 
  556       for (
vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
 
  557         detail::matrix_access<NumericT>(cpu_matrix, i, j) = entries[i * gpu_matrix_slice.stride1()];
 
  567 template<
typename MatrixType>
 
  570   assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
 
  575 template<
typename MatrixType>
 
  578   assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
 
  583 template<
typename MatrixType>
 
  586   assert(r1.
size() <= A.size1() && r2.
size() <= A.size2() && bool(
"Size of slice invalid!"));
 
viennacl::tools::shared_ptr< char > handle_type
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
MatrixType::handle_type handle_type
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
DistanceT difference_type
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'. 
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_range(matrix_range< MatrixType > const &A, range const &row_range, range const &col_range)
Generic size and resize functionality for different vector and matrix types. 
Class for representing strided submatrices of a bigger matrix A. 
self_type & operator=(const self_type &other)
range::size_type size_type
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
MatrixType::value_type value_type
MatrixType::handle_type handle_type
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
MatrixType::handle_type handle_type
size_type stride2() const
Returns the number of columns. 
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL. 
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM. 
range::difference_type difference_type
Forward declaration of dense matrix classes. 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
matrix_slice(self_type const &other)
MatrixType::value_type value_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
range::size_type size_type
matrix_range(self_type const &A, range const &row_range, range const &col_range)
result_of::size_type< T >::type start(T const &obj)
matrix_slice(MatrixType const &A, slice const &row_slice, slice const &col_slice)
range::difference_type difference_type
const value_type & const_reference
size_type stride1() const
Returns the number of rows. 
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
matrix_range(self_type const &other)
size_type size2() const
Returns the number of columns. 
handle_type & handle()
Returns the OpenCL handle, non-const-version. 
base_type & operator=(viennacl::matrix_slice< viennacl::matrix< OtherNumericT, F > > const &B)
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
size_type size1() const
Returns the number of rows. 
MatrixType::handle_type handle_type
base_type & operator=(viennacl::matrix< OtherNumericT, F > const &B)
matrix_slice(self_type const &A, slice const &row_slice, slice const &col_slice)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Implementation of a slice object for use with proxy objects. 
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
Implementation of a range object for use with proxy objects. 
size_type start2() const
Returns the number of columns. 
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Class for representing non-strided submatrices of a bigger matrix A. 
NumericT const & matrix_access(MatrixT const &A, vcl_size_t i, vcl_size_t j)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
viennacl::result_of::cpu_value_type< value_type >::type cpu_value_type
const value_type & const_reference
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
A tag for row-major storage of a dense matrix. 
matrix_range(MatrixType const &A, range const &row_range, range const &col_range)
size_type start1() const
Returns the number of rows. 
base_type & operator=(viennacl::matrix_range< viennacl::matrix< OtherNumericT, F > > const &B)
matrix_slice(matrix_slice< MatrixType > const &A, slice const &row_slice, slice const &col_slice)