1 #ifndef VIENNACL_LINALG_CUDA_MATRIX_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_CUDA_MATRIX_OPERATIONS_HPP_ 
   56 template<
typename DestNumericT, 
typename SrcNumericT>
 
   59   assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
 
   93 template<
typename NumericT, 
typename SizeT, 
typename DistanceT>
 
   98                             static_cast<unsigned int>(proxy.lhs().start1()),          static_cast<unsigned int>(proxy.lhs().start2()),
 
   99                             static_cast<unsigned int>(proxy.lhs().internal_size1()),  static_cast<unsigned int>(proxy.lhs().internal_size2()),
 
  100                             static_cast<unsigned int>(proxy.lhs().size1()),           static_cast<unsigned int>(proxy.lhs().size2()),
 
  101                             static_cast<unsigned int>(proxy.lhs().stride1()),         static_cast<unsigned int>(proxy.lhs().stride2()),
 
  104                             static_cast<unsigned int>(temp_trans.
start1()),            static_cast<unsigned int>(temp_trans.
start2()),
 
  106                             static_cast<unsigned int>(temp_trans.
stride1()),           static_cast<unsigned int>(temp_trans.
stride2()),
 
  107                             static_cast<bool>(proxy.lhs().row_major()));
 
  112 template<
typename NumericT, 
typename ScalarT>
 
  116   assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
 
  120   unsigned int options_alpha = 
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
 
  122   value_type temporary_alpha = 0;
 
  124     temporary_alpha = alpha;
 
  163 template<
typename NumericT, 
typename ScalarT1, 
typename ScalarT2>
 
  172   unsigned int options_alpha = 
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
 
  174   value_type temporary_alpha = 0;
 
  176     temporary_alpha = alpha;
 
  181   value_type temporary_beta = 0;
 
  183     temporary_beta = beta;
 
  238 template<
typename NumericT, 
typename ScalarT1, 
typename ScalarT2>
 
  247   unsigned int options_alpha = 
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
 
  249   value_type temporary_alpha = 0;
 
  251     temporary_alpha = alpha;
 
  256   value_type temporary_beta = 0;
 
  258     temporary_beta = beta;
 
  315 template<
typename NumericT>
 
  319   value_type alpha = s;
 
  331                                            static_cast<unsigned int>(viennacl::traits::internal_size1(mat)),   static_cast<unsigned int>(viennacl::traits::internal_size2(mat)),
 
  341                                             static_cast<unsigned int>(viennacl::traits::internal_size1(mat)),   static_cast<unsigned int>(viennacl::traits::internal_size2(mat)),
 
  347 template<
typename NumericT>
 
  351   value_type alpha = s;
 
  376 template<
typename NumericT>
 
  385   unsigned int options_alpha = 0;
 
  416                           static_cast<unsigned int>(mat_start),
 
  417                           static_cast<unsigned int>(mat_stride),
 
  418                           static_cast<unsigned int>(mat_size),
 
  420                           viennacl::cuda_arg<value_type>(
NumericT(1)),
 
  428 template<
typename NumericT>
 
  433   unsigned int options_alpha = 0;
 
  467                           viennacl::cuda_arg<value_type>(
NumericT(1)),
 
  470                           static_cast<unsigned int>(mat_start),
 
  471                           static_cast<unsigned int>(mat_stride));
 
  475 template<
typename NumericT>
 
  480   unsigned int options_alpha = 0;
 
  500                           viennacl::cuda_arg<value_type>(
NumericT(1)),
 
  503                           static_cast<unsigned int>(mat_start),
 
  504                           static_cast<unsigned int>(mat_stride));
 
  508 template<
typename NumericT>
 
  513   unsigned int options_alpha = 0;
 
  533                           viennacl::cuda_arg<value_type>(
NumericT(1)),
 
  536                           static_cast<unsigned int>(mat_start),
 
  537                           static_cast<unsigned int>(mat_stride));
 
  547 template<
typename NumericT, 
typename SizeT, 
typename OpT>
 
  551   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  555   unsigned int op_type = 2; 
 
  607 template<
typename SizeT, 
typename OpT>
 
  611   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  613   typedef float        value_type;
 
  615   unsigned int op_type = 2; 
 
  667 template<
typename SizeT, 
typename OpT>
 
  671   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  673   typedef double        value_type;
 
  675   unsigned int op_type = 2; 
 
  735 template<
typename NumericT>
 
  739   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  777 template<
typename NumericT>
 
  781   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  819 template<
typename NumericT>
 
  823   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  861 template<
typename NumericT>
 
  865   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  903 template<
typename NumericT>
 
  907   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  945 template<
typename NumericT>
 
  949   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  987 template<
typename NumericT>
 
  991   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1029 template<
typename NumericT>
 
 1033   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1071 template<
typename NumericT>
 
 1075   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1113 template<
typename NumericT>
 
 1117   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1155 template<
typename NumericT>
 
 1159   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1197 template<
typename NumericT>
 
 1201   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1239 template<
typename NumericT>
 
 1243   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1281 template<
typename NumericT>
 
 1285   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1323 template<
typename NumericT>
 
 1327   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1365 template<
typename NumericT>
 
 1369   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1407 template<
typename NumericT>
 
 1411   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
 1463 template<
typename NumericT>
 
 1568   template<
typename MatrixT1, 
typename MatrixT2, 
typename MatrixT3, 
typename ScalarT>
 
 1570                         const MatrixT2 & B, 
bool transposed_B,
 
 1577     cpu_value_type converted_alpha = 
static_cast<cpu_value_type
>(alpha);
 
 1578     cpu_value_type converted_beta  = 
static_cast<cpu_value_type
>(beta);
 
 1580     dim3 threads(16, 16);
 
 1584     bool row_major_A = A.row_major();
 
 1585     bool row_major_B = B.row_major();
 
 1586     bool row_major_C = C.row_major();
 
 1589     if (!row_major_C && !row_major_A && !row_major_B && !transposed_A && !transposed_B)
 
 1591       matrix_matrix_col_col_col_prod_AA_kernel<<<grid, threads>>>
 
 1612     else if (!row_major_C && !row_major_A && !row_major_B && !transposed_A && transposed_B)
 
 1614       matrix_matrix_col_col_col_prod_AT_kernel<<<grid, threads>>>
 
 1635     else if (!row_major_C && !row_major_A && !row_major_B && transposed_A && !transposed_B)
 
 1637       matrix_matrix_col_col_col_prod_TA_kernel<<<grid, threads>>>
 
 1658     else if (!row_major_C && !row_major_A && !row_major_B && transposed_A && transposed_B)
 
 1660       matrix_matrix_col_col_col_prod_TT_kernel<<<grid, threads>>>
 
 1683     else if (!row_major_C && !row_major_A && row_major_B && !transposed_A && !transposed_B)
 
 1685       matrix_matrix_col_col_row_prod_AA_kernel<<<grid, threads>>>
 
 1706     else if (!row_major_C && !row_major_A && row_major_B && !transposed_A && transposed_B)
 
 1708       matrix_matrix_col_col_row_prod_AT_kernel<<<grid, threads>>>
 
 1729     else if (!row_major_C && !row_major_A && row_major_B && transposed_A && !transposed_B)
 
 1731       matrix_matrix_col_col_row_prod_TA_kernel<<<grid, threads>>>
 
 1752     else if (!row_major_C && !row_major_A && row_major_B && transposed_A && transposed_B)
 
 1754       matrix_matrix_col_col_row_prod_TT_kernel<<<grid, threads>>>
 
 1777     else if (!row_major_C && row_major_A && !row_major_B && !transposed_A && !transposed_B)
 
 1779       matrix_matrix_col_row_col_prod_AA_kernel<<<grid, threads>>>
 
 1800     else if (!row_major_C && row_major_A && !row_major_B && !transposed_A && transposed_B)
 
 1802       matrix_matrix_col_row_col_prod_AT_kernel<<<grid, threads>>>
 
 1823     else if (!row_major_C && row_major_A && !row_major_B && transposed_A && !transposed_B)
 
 1825       matrix_matrix_col_row_col_prod_TA_kernel<<<grid, threads>>>
 
 1846     else if (!row_major_C && row_major_A && !row_major_B && transposed_A && transposed_B)
 
 1848       matrix_matrix_col_row_col_prod_TT_kernel<<<grid, threads>>>
 
 1871     else if (!row_major_C && row_major_A && row_major_B && !transposed_A && !transposed_B)
 
 1873       matrix_matrix_col_row_row_prod_AA_kernel<<<grid, threads>>>
 
 1894     else if (!row_major_C && row_major_A && row_major_B && !transposed_A && transposed_B)
 
 1896       matrix_matrix_col_row_row_prod_AT_kernel<<<grid, threads>>>
 
 1917     else if (!row_major_C && row_major_A && row_major_B && transposed_A && !transposed_B)
 
 1919       matrix_matrix_col_row_row_prod_TA_kernel<<<grid, threads>>>
 
 1940     else if (!row_major_C && row_major_A && row_major_B && transposed_A && transposed_B)
 
 1942       matrix_matrix_col_row_row_prod_TT_kernel<<<grid, threads>>>
 
 1965     else if (row_major_C && !row_major_A && !row_major_B && !transposed_A && !transposed_B)
 
 1967       matrix_matrix_row_col_col_prod_AA_kernel<<<grid, threads>>>
 
 1988     else if (row_major_C && !row_major_A && !row_major_B && !transposed_A && transposed_B)
 
 1990       matrix_matrix_row_col_col_prod_AT_kernel<<<grid, threads>>>
 
 2011     else if (row_major_C && !row_major_A && !row_major_B && transposed_A && !transposed_B)
 
 2013       matrix_matrix_row_col_col_prod_TA_kernel<<<grid, threads>>>
 
 2034     else if (row_major_C && !row_major_A && !row_major_B && transposed_A && transposed_B)
 
 2036       matrix_matrix_row_col_col_prod_TT_kernel<<<grid, threads>>>
 
 2059     else if (row_major_C && !row_major_A && row_major_B && !transposed_A && !transposed_B)
 
 2061       matrix_matrix_row_col_row_prod_AA_kernel<<<grid, threads>>>
 
 2082     else if (row_major_C && !row_major_A && row_major_B && !transposed_A && transposed_B)
 
 2084       matrix_matrix_row_col_row_prod_AT_kernel<<<grid, threads>>>
 
 2105     else if (row_major_C && !row_major_A && row_major_B && transposed_A && !transposed_B)
 
 2107       matrix_matrix_row_col_row_prod_TA_kernel<<<grid, threads>>>
 
 2128     else if (row_major_C && !row_major_A && row_major_B && transposed_A && transposed_B)
 
 2130       matrix_matrix_row_col_row_prod_TT_kernel<<<grid, threads>>>
 
 2153     else if (row_major_C && row_major_A && !row_major_B && !transposed_A && !transposed_B)
 
 2155       matrix_matrix_row_row_col_prod_AA_kernel<<<grid, threads>>>
 
 2176     else if (row_major_C && row_major_A && !row_major_B && !transposed_A && transposed_B)
 
 2178       matrix_matrix_row_row_col_prod_AT_kernel<<<grid, threads>>>
 
 2199     else if (row_major_C && row_major_A && !row_major_B && transposed_A && !transposed_B)
 
 2201       matrix_matrix_row_row_col_prod_TA_kernel<<<grid, threads>>>
 
 2222     else if (row_major_C && row_major_A && !row_major_B && transposed_A && transposed_B)
 
 2224       matrix_matrix_row_row_col_prod_TT_kernel<<<grid, threads>>>
 
 2249     else if (row_major_C && row_major_A && row_major_B && !transposed_A && !transposed_B)
 
 2251       matrix_matrix_row_row_row_prod_AA_kernel<<<grid, threads>>>
 
 2272     else if (row_major_C && row_major_A && row_major_B && !transposed_A && transposed_B)
 
 2274       matrix_matrix_row_row_row_prod_AT_kernel<<<grid, threads>>>
 
 2295     else if (row_major_C && row_major_A && row_major_B && transposed_A && !transposed_B)
 
 2297       matrix_matrix_row_row_row_prod_TA_kernel<<<grid, threads>>>
 
 2318     else if (row_major_C && row_major_A && row_major_B && transposed_A && transposed_B)
 
 2320       matrix_matrix_row_row_row_prod_TT_kernel<<<grid, threads>>>
 
 2345   template<
typename MatrixT1, 
typename MatrixT2, 
typename MatrixT3, 
typename ScalarT>
 
 2346   void prod(
const MatrixT1 & A, 
bool transposed_A,
 
 2347             const MatrixT2 & B, 
bool transposed_B,
 
 2383 template<
typename NumericT, 
typename ScalarT>
 
 2415 template<
typename NumericT, 
typename ScalarT>
 
 2417                           ScalarT 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
 2426   unsigned int options_alpha = 
detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha);
 
 2428   value_type temporary_alpha = 0;
 
 2430     temporary_alpha = alpha;
 
 2488 template <
typename NumericT, 
typename VectorType>
 
 2496       viennacl::linalg::cuda::bidiag_pack_row_major_kernel<<<128, 128>>>(
viennacl::cuda_arg(A),
 
 2505       viennacl::linalg::cuda::bidiag_pack_column_major_kernel<<<128, 128>>>(
viennacl::cuda_arg(A),
 
 2525 template <
typename NumericT>
 
 2539                                         static_cast<unsigned int>(row_start),
 
 2540                                         static_cast<unsigned int>(col_start),
 
 2548                                         static_cast<unsigned int>(row_start),
 
 2549                                         static_cast<unsigned int>(col_start),
 
 2562                                         static_cast<unsigned int>(row_start),
 
 2563                                         static_cast<unsigned int>(col_start),
 
 2571                                         static_cast<unsigned int>(row_start),
 
 2572                                         static_cast<unsigned int>(col_start),
 
 2586 template <
typename NumericT>
 
 2595                                                static_cast<unsigned int>(start + 1),
 
 2596                                                static_cast<unsigned int>(start),
 
 2607                                                static_cast<unsigned int>(start + 1),
 
 2608                                                static_cast<unsigned int>(start),
 
 2625 template <
typename NumericT>
 
 2633                                                 static_cast<unsigned int>(0),
 
 2634                                                 static_cast<unsigned int>(0),
 
 2645                                                 static_cast<unsigned int>(0),
 
 2646                                                 static_cast<unsigned int>(0),
 
 2662 template <
typename NumericT>
 
 2672                                            static_cast<unsigned int>(A_size1),
 
 2679                                            static_cast<unsigned int>(A_size1),
 
 2693 template<
typename NumericT>
 
 2706                                      static_cast<unsigned int>(l),
 
 2707                                      static_cast<unsigned int>(m - 1));
 
 2715                                      static_cast<unsigned int>(l),
 
 2716                                      static_cast<unsigned int>(m - 1));
 
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors. 
unsigned int make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Generic size and resize functionality for different vector and matrix types. 
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Implementations of row-major dense matrix related operations, including matrix-vector products...
size_type stride2() const 
Returns the number of columns. 
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Determines row and column increments for matrices and matrix proxies. 
Implementations of column-major dense matrix related operations, including matrix-vector products...
viennacl::scalar< int > s2
viennacl::scalar< float > s1
void prod_impl(const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void prod_slow_kernel(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
result_of::size_type< T >::type start2(T const &obj)
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void matrix_diag_to_vector(matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec)
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
result_of::size_type< T >::type start(T const &obj)
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
size_type stride1() const 
Returns the number of rows. 
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
Dense matrix-matrix product CUDA kernels reside here. 
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors. 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void element_op(matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
Common routines for CUDA execution. 
void matrix_row(matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec)
__global__ void givens_next_row_major_kernel(T *matr, T *cs, T *ss, unsigned int size, unsigned int stride, unsigned int start_i, unsigned int end_i)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
This function stores the diagonal and the superdiagonal of a matrix in two vectors. 
A tag class representing transposed matrices. 
size_type start2() const 
Returns the number of columns. 
#define VIENNACL_CUDA_LAST_ERROR_CHECK(message)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size2() const 
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version. 
size_type internal_size1() const 
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm. 
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class. 
Implementations of NMF operations using CUDA. 
A collection of compile time type deductions. 
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
viennacl::backend::mem_handle::cuda_handle_type & arg_reference(viennacl::scalar< NumericT > &s, OtherT)
void copy_vec(matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector. 
Simple enable-if variant that uses the SFINAE pattern. 
size_type start1() const 
Returns the number of rows.