1 #ifndef VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_OPENCL_MATRIX_OPERATIONS_HPP_ 
   59   template<
typename NumericT>
 
   67       KernelClass::init(ctx);
 
   68       program = &ctx.
get_program(KernelClass::program_name());
 
   73       KernelClass::init(ctx);
 
   74       program = &ctx.
get_program(KernelClass::program_name());
 
   79   template<
typename NumericT>
 
   87       KernelClass::init(ctx);
 
   88       program = &ctx.
get_program(KernelClass::program_name());
 
   93       KernelClass::init(ctx);
 
   94       program = &ctx.
get_program(KernelClass::program_name());
 
   99   template<
typename NumericT>
 
  107       KernelClass::init(ctx);
 
  108       program = &ctx.
get_program(KernelClass::program_name());
 
  113       KernelClass::init(ctx);
 
  114       program = &ctx.
get_program(KernelClass::program_name());
 
  133 template<
typename DestNumericT, 
typename SrcNumericT>
 
  136   assert(dest.
row_major() == src.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
 
  138   assert(viennacl::traits::opencl_handle(dest).
context() == viennacl::traits::opencl_handle(src).
context() && 
bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  140   std::string kernel_name(
"convert_");
 
  141   kernel_name += dest.
row_major() ? 
"row_" : 
"col_";
 
  174                           viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
 
  176                           viennacl::traits::opencl_handle(mat2),
 
  186           typename ScalarT1, 
typename ScalarT2>
 
  191   std::string kernel_name;
 
  193     kernel_name = 
"ambm_cpu_cpu";
 
  195     kernel_name = 
"ambm_cpu_gpu";
 
  197     kernel_name = 
"ambm_gpu_cpu";
 
  199     kernel_name = 
"ambm_gpu_gpu";
 
  212                           viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
 
  214                           viennacl::traits::opencl_handle(mat2),
 
  219                           viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
 
  221                           viennacl::traits::opencl_handle(mat3),
 
  231           typename ScalarT1, 
typename ScalarT2>
 
  236   std::string kernel_name;
 
  238     kernel_name = 
"ambm_m_cpu_cpu";
 
  240     kernel_name = 
"ambm_m_cpu_gpu";
 
  242     kernel_name = 
"ambm_m_gpu_cpu";
 
  244     kernel_name = 
"ambm_m_gpu_gpu";
 
  257                           viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
 
  259                           viennacl::traits::opencl_handle(mat2),
 
  264                           viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(beta)),
 
  266                           viennacl::traits::opencl_handle(mat3),
 
  275           typename SizeT, 
typename DistanceT>
 
  279   std::string kernel_name(
"trans_kernel");
 
  282                                 static_cast<cl_uint
>(proxy.lhs().start1()),         static_cast<cl_uint>(proxy.lhs().start2()),
 
  283                                 static_cast<cl_uint>(proxy.lhs().internal_size1()), static_cast<cl_uint>(proxy.lhs().internal_size2()),
 
  284                                 static_cast<cl_uint>(proxy.lhs().size1()),          static_cast<cl_uint>(proxy.lhs().size2()),
 
  285                                 static_cast<cl_uint>(proxy.lhs().stride1()),        static_cast<cl_uint>(proxy.lhs().stride2()),
 
  288                                 static_cast<cl_uint>(temp_trans.
start1()),         static_cast<cl_uint>(temp_trans.
start2()),
 
  290                                 static_cast<cl_uint>(temp_trans.
stride1()),        static_cast<cl_uint>(temp_trans.
stride2())));
 
  293 template <
typename NumericT>
 
  305                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
 
  310 template <
typename NumericT>
 
  319                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(s))
 
  324 template <
typename NumericT>
 
  335   KernelClass::init(ctx);
 
  337   cl_uint options_alpha = 0;
 
  378                               viennacl::traits::opencl_handle(
NumericT(1)),
 
  380                               viennacl::traits::opencl_handle(vec),
 
  385 template <
typename NumericT>
 
  391   KernelClass::init(ctx);
 
  393   cl_uint options_alpha = 0;
 
  435                               viennacl::traits::opencl_handle(
NumericT(1)),
 
  437                               viennacl::traits::opencl_handle(mat),
 
  442 template <
typename NumericT>
 
  448   KernelClass::init(ctx);
 
  450   cl_uint options_alpha = 0;
 
  478                               viennacl::traits::opencl_handle(
NumericT(1)),
 
  480                               viennacl::traits::opencl_handle(mat),
 
  485 template <
typename NumericT>
 
  491   KernelClass::init(ctx);
 
  493   cl_uint options_alpha = 0;
 
  521                               viennacl::traits::opencl_handle(
NumericT(1)),
 
  523                               viennacl::traits::opencl_handle(mat),
 
  539 template <
typename T, 
typename OP>
 
  543   assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  544   assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  560                           viennacl::traits::opencl_handle(proxy.lhs()),
 
  565                           viennacl::traits::opencl_handle(proxy.rhs()),
 
  582 template <
typename T, 
typename OP>
 
  586   assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  587   assert(viennacl::traits::opencl_handle(A).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Matrices do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  597                            viennacl::traits::opencl_handle(proxy.lhs()),
 
  619 template <
typename NumericT>
 
  634                           viennacl::traits::opencl_handle(vec),
 
  639                           viennacl::traits::opencl_handle(result),
 
  657 template<
typename NumericT, 
typename ScalarType >
 
  664     bool effective_A_trans = A_trans ^ A.
row_major();
 
  665     bool effective_B_trans = B_trans ^ B.
row_major();
 
  667     char cAt = effective_A_trans ? 
'T' : 
'N';
 
  668     char cBt = effective_B_trans ? 
'T' : 
'N';
 
  670     std::string kernel_prefix(
"prod_");
 
  695 template<
typename NumericT, 
typename ScalarT1>
 
  697                           ScalarT1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
  714                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<NumericT>(alpha)),
 
  717                            viennacl::traits::opencl_handle(vec1),
 
  722                            viennacl::traits::opencl_handle(vec2),
 
  731 template <
typename SCALARTYPE, 
typename VectorType>
 
  747                                 static_cast<cl_uint>(A.
size1()),
 
  748                                 static_cast<cl_uint>(A.
size2()),
 
  757 template <
typename NumericT>
 
  796 template <
typename NumericT>
 
  810                                       static_cast<cl_uint>(start + 1),
 
  811                                       static_cast<cl_uint>(start),
 
  825                                       static_cast<cl_uint>(start + 1),
 
  826                                       static_cast<cl_uint>(start),
 
  839 template <
typename NumericT>
 
  853                                       static_cast<cl_uint>(0),
 
  854                                       static_cast<cl_uint>(0),
 
  869                                       static_cast<cl_uint>(0),
 
  870                                       static_cast<cl_uint>(0),
 
  883 template <
typename NumericT>
 
  921 template<
typename NumericT>
 
  944                                       static_cast<cl_uint>(l),
 
  945                                       static_cast<cl_uint>(m - 1)
 
  961                                       static_cast<cl_uint>(l),
 
  962                                       static_cast<cl_uint>(m - 1)
 
  969   template <
typename NumericT>
 
  988                                       static_cast<cl_uint>(row_start),
 
  989                                       static_cast<cl_uint>(col_start),
 
 1003                                       static_cast<cl_uint>(row_start),
 
 1004                                       static_cast<cl_uint>(col_start),
 
cl_uint stride
Increment between integers. 
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Represents an OpenCL device within ViennaCL. 
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void prod_impl(const matrix_base< NumericT > &mat, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types. 
Represents an OpenCL kernel within ViennaCL. 
viennacl::ocl::program & get_program(std::string const &name)
Returns the program with the provided name. 
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
cl_uint start
Starting value of the integer stride. 
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const 
Returns the local work size at the respective dimension. 
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
const std::string SVD_BIDIAG_PACK_KERNEL
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
size_type stride2() const 
Returns the number of columns. 
const std::string SVD_GIVENS_NEXT_KERNEL
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
static void init(viennacl::ocl::context &ctx)
Determines row and column increments for matrices and matrix proxies. 
void bidiag_pack(matrix_base< NumericT > &A, viennacl::vector< NumericT > &dh, viennacl::vector< NumericT > &sh)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector. 
viennacl::ocl::kernel & element_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void scaled_rank_1_update(matrix_base< NumericT > &A, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
OpenCL kernel file for singular value decomposition. 
const std::string SVD_COPY_ROW_KERNEL
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding. 
Common implementations shared by OpenCL-based operations. 
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
Main kernel class for generating OpenCL kernels for elementwise-operations such as element_sin() on/w...
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
static device_specific::execution_handler & execution_handler(bool is_row_major, viennacl::ocl::context &ctx)
viennacl::ocl::kernel & kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument. 
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
OpenCL kernel file for vector operations. 
Implementation of a smart-pointer-like class for handling OpenCL handles. 
result_of::size_type< T >::type start(T const &obj)
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
size_type stride1() const 
Returns the number of rows. 
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
size_type size2() const 
Returns the number of columns. 
void bidiag_pack_svd(viennacl::matrix< SCALARTYPE > &A, VectorType &dh, VectorType &sh)
Wrapper class for an OpenCL program. 
void execute(template_base const &T, statements_container const &statements, viennacl::ocl::context &ctx=viennacl::ocl::current_context(), bool force_compilation=false)
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C) 
size_type size1() const 
Returns the number of rows. 
Proxy classes for vectors. 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
statement mat_mat_prod(NumericT alpha, viennacl::matrix_base< NumericT > const *A, bool A_trans, viennacl::matrix_base< NumericT > const *B, bool B_trans, NumericT beta, viennacl::matrix_base< NumericT > const *C)
Main kernel class for generating OpenCL kernels for operations on/with dense matrix objects of type v...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
viennacl::ocl::kernel & legacy_kernel_for_matrix(matrix_base< NumericT > const &M, std::string const &kernel_name)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
Representation of an OpenCL kernel in ViennaCL. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
size_type global_work_size(int index=0) const 
Returns the global work size at the respective dimension. 
void givens_next(matrix_base< NumericT > &matrix, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
A tag class representing transposed matrices. 
size_type start2() const 
Returns the number of columns. 
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size2() const 
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
The main class for representing a statement such as x = inner_prod(y,z); at runtime. 
const std::string SVD_COPY_COL_KERNEL
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
size_type internal_size1() const 
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::ocl::kernel & get_kernel(std::string const &name)
Returns the kernel with the provided name. 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
std::string op_to_string(op_abs)
static void init(viennacl::ocl::context &ctx)
Helper class for converting a type to its string representation. 
OpenCL kernel file for element-wise matrix operations. 
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class. 
A collection of compile time type deductions. 
static std::string program_name()
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Simple enable-if variant that uses the SFINAE pattern. 
size_type start1() const 
Returns the number of rows. 
cl_uint size
Number of values in the stride. 
Runtime generation of OpenCL kernels for matrix operations. 
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void matrix_row(matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)