1 #ifndef VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_VECTOR_OPERATIONS_HPP_ 
   38 #ifdef VIENNACL_WITH_OPENCL 
   42 #ifdef VIENNACL_WITH_CUDA 
   50     template<
typename DestNumericT, 
typename SrcNumericT>
 
   60 #ifdef VIENNACL_WITH_OPENCL 
   65 #ifdef VIENNACL_WITH_CUDA 
   77     template<
typename T, 
typename ScalarType1>
 
   79             vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha)
 
   88 #ifdef VIENNACL_WITH_OPENCL 
   93 #ifdef VIENNACL_WITH_CUDA 
  106     template<
typename T, 
typename ScalarType1, 
typename ScalarType2>
 
  108               vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
  109               vector_base<T> const & vec3, ScalarType2 
const & beta,  
vcl_size_t len_beta,  
bool reciprocal_beta,  
bool flip_sign_beta)
 
  118                                                   vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  119                                                   vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  121 #ifdef VIENNACL_WITH_OPENCL 
  124                                          vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  125                                          vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  128 #ifdef VIENNACL_WITH_CUDA 
  131                                        vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  132                                        vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  143     template<
typename T, 
typename ScalarType1, 
typename ScalarType2>
 
  145                 vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
  146                 vector_base<T> const & vec3, ScalarType2 
const & beta,  
vcl_size_t len_beta,  
bool reciprocal_beta,  
bool flip_sign_beta)
 
  155                                                     vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  156                                                     vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  158 #ifdef VIENNACL_WITH_OPENCL 
  161                                            vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  162                                            vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  165 #ifdef VIENNACL_WITH_CUDA 
  168                                          vec2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
 
  169                                          vec3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
 
  194 #ifdef VIENNACL_WITH_OPENCL 
  199 #ifdef VIENNACL_WITH_CUDA 
  227 #ifdef VIENNACL_WITH_OPENCL 
  232 #ifdef VIENNACL_WITH_CUDA 
  254     template<
typename T, 
typename OP>
 
  265 #ifdef VIENNACL_WITH_OPENCL 
  270 #ifdef VIENNACL_WITH_CUDA 
  285 #define VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(OPNAME) \ 
  286     template<typename T> \ 
  287     viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> > \ 
  288     element_##OPNAME(vector_base<T> const & v1, vector_base<T> const & v2) \ 
  290       return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_##OPNAME> >(v1, v2); \ 
  293     template<typename V1, typename V2, typename OP, typename T> \ 
  294     viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> > \ 
  295     element_##OPNAME(vector_expression<const V1, const V2, OP> const & proxy, vector_base<T> const & v2) \ 
  297       return viennacl::vector_expression<const vector_expression<const V1, const V2, OP>, const vector_base<T>, op_element_binary<op_##OPNAME> >(proxy, v2); \ 
  300     template<typename T, typename V2, typename V3, typename OP> \ 
  301     viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> > \ 
  302     element_##OPNAME(vector_base<T> const & v1, vector_expression<const V2, const V3, OP> const & proxy) \ 
  304       return viennacl::vector_expression<const vector_base<T>, const vector_expression<const V2, const V3, OP>, op_element_binary<op_##OPNAME> >(v1, proxy); \ 
  307     template<typename V1, typename V2, typename OP1, \ 
  308               typename V3, typename V4, typename OP2> \ 
  309     viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \ 
  310                                 const vector_expression<const V3, const V4, OP2>, \ 
  311                                 op_element_binary<op_##OPNAME> > \ 
  312     element_##OPNAME(vector_expression<const V1, const V2, OP1> const & proxy1, \ 
  313                      vector_expression<const V3, const V4, OP2> const & proxy2) \ 
  315       return viennacl::vector_expression<const vector_expression<const V1, const V2, OP1>, \ 
  316                                          const vector_expression<const V3, const V4, OP2>, \ 
  317                                          op_element_binary<op_##OPNAME> >(proxy1, proxy2); \ 
  320     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(
prod)  
 
  321     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(div)   
 
  322     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(pow)   
 
  324     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(eq)
 
  325     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(neq)
 
  326     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(greater)
 
  327     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(less)
 
  328     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(geq)
 
  329     VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(leq)
 
  331 #undef VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS 
  334 #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \ 
  335     template<typename T> \ 
  336     viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> > \ 
  337     element_##funcname(vector_base<T> const & v) \ 
  339       return viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<op_##funcname> >(v, v); \ 
  341     template<typename LHS, typename RHS, typename OP> \ 
  342     viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \ 
  343                                 const vector_expression<const LHS, const RHS, OP>, \ 
  344                                 op_element_unary<op_##funcname> > \ 
  345     element_##funcname(vector_expression<const LHS, const RHS, OP> const & proxy) \ 
  347       return viennacl::vector_expression<const vector_expression<const LHS, const RHS, OP>, \ 
  348                                          const vector_expression<const LHS, const RHS, OP>, \ 
  349                                          op_element_unary<op_##funcname> >(proxy, proxy); \ 
  370 #undef VIENNACL_MAKE_UNARY_ELEMENT_OP 
  391       assert( vec1.
size() == vec2.
size() && bool(
"Size mismatch") );
 
  398 #ifdef VIENNACL_WITH_OPENCL 
  403 #ifdef VIENNACL_WITH_CUDA 
  416     template<
typename LHS, 
typename RHS, 
typename OP, 
typename T>
 
  427     template<
typename T, 
typename LHS, 
typename RHS, 
typename OP>
 
  438     template<
typename LHS1, 
typename RHS1, 
typename OP1,
 
  439               typename LHS2, 
typename RHS2, 
typename OP2, 
typename T>
 
  463       assert( vec1.
size() == vec2.
size() && bool(
"Size mismatch") );
 
  470 #ifdef VIENNACL_WITH_OPENCL 
  475 #ifdef VIENNACL_WITH_CUDA 
  488     template<
typename LHS, 
typename RHS, 
typename OP, 
typename T>
 
  499     template<
typename T, 
typename LHS, 
typename RHS, 
typename OP>
 
  510     template<
typename LHS1, 
typename RHS1, 
typename OP1,
 
  511               typename LHS2, 
typename RHS2, 
typename OP2, 
typename S3>
 
  534       assert( x.
size() == y_tuple.
const_at(0).size() && bool(
"Size mismatch") );
 
  535       assert( result.
size() == y_tuple.
const_size() && bool(
"Number of elements does not match result size") );
 
  542 #ifdef VIENNACL_WITH_OPENCL 
  547 #ifdef VIENNACL_WITH_CUDA 
  574 #ifdef VIENNACL_WITH_OPENCL 
  579 #ifdef VIENNACL_WITH_CUDA 
  597     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
  621 #ifdef VIENNACL_WITH_OPENCL 
  626 #ifdef VIENNACL_WITH_CUDA 
  643     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
  668 #ifdef VIENNACL_WITH_OPENCL 
  673 #ifdef VIENNACL_WITH_CUDA 
  690     template<
typename LHS, 
typename RHS, 
typename OP, 
typename T>
 
  713 #ifdef VIENNACL_WITH_OPENCL 
  718 #ifdef VIENNACL_WITH_CUDA 
  735     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
  760 #ifdef VIENNACL_WITH_OPENCL 
  765 #ifdef VIENNACL_WITH_CUDA 
  782     template<
typename LHS, 
typename RHS, 
typename OP, 
typename T>
 
  805 #ifdef VIENNACL_WITH_OPENCL 
  810 #ifdef VIENNACL_WITH_CUDA 
  827     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
  851 #ifdef VIENNACL_WITH_OPENCL 
  855 #ifdef VIENNACL_WITH_CUDA 
  870     template<
typename LHS, 
typename RHS, 
typename OP>
 
  884     template<
typename NumericT>
 
  892 #ifdef VIENNACL_WITH_OPENCL 
  897 #ifdef VIENNACL_WITH_CUDA 
  914     template<
typename LHS, 
typename RHS, 
typename OP, 
typename NumericT>
 
  935 #ifdef VIENNACL_WITH_OPENCL 
  940 #ifdef VIENNACL_WITH_CUDA 
  957     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
  971     template<
typename NumericT>
 
  979 #ifdef VIENNACL_WITH_OPENCL 
  984 #ifdef VIENNACL_WITH_CUDA 
 1001     template<
typename LHS, 
typename RHS, 
typename OP, 
typename NumericT>
 
 1014     template<
typename T>
 
 1022 #ifdef VIENNACL_WITH_OPENCL 
 1027 #ifdef VIENNACL_WITH_CUDA 
 1044     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
 1058     template<
typename NumericT>
 
 1066 #ifdef VIENNACL_WITH_OPENCL 
 1071 #ifdef VIENNACL_WITH_CUDA 
 1088     template<
typename LHS, 
typename RHS, 
typename OP, 
typename NumericT>
 
 1101     template<
typename T>
 
 1109 #ifdef VIENNACL_WITH_OPENCL 
 1114 #ifdef VIENNACL_WITH_CUDA 
 1131     template<
typename LHS, 
typename RHS, 
typename OP, 
typename S2>
 
 1151     template<
typename T>
 
 1161 #ifdef VIENNACL_WITH_OPENCL 
 1166 #ifdef VIENNACL_WITH_CUDA 
 1189     template<
typename NumericT>
 
 1198   #ifdef VIENNACL_WITH_OPENCL 
 1204   #ifdef VIENNACL_WITH_CUDA 
 1222     template<
typename NumericT>
 
 1239     template<
typename NumericT>
 
 1248   #ifdef VIENNACL_WITH_OPENCL 
 1254   #ifdef VIENNACL_WITH_CUDA 
 1272     template<
typename NumericT>
 
 1279   template<
typename T, 
typename LHS, 
typename RHS, 
typename OP>
 
 1283     assert( (v1.
size() > 0) && 
bool(
"Vector not yet initialized!") );
 
 1290   template<
typename T, 
typename LHS, 
typename RHS, 
typename OP>
 
 1294     assert( (v1.
size() > 0) && 
bool(
"Vector not yet initialized!") );
 
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the minimum of a vector, where the result is stored on a CPU scalar. 
vcl_size_t const_size() const 
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
void vector_assign(vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_2_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^2-norm of a vector - implementation. 
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP. 
Worker class for decomposing expression templates. 
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors with the final reduction step on the CPU - dispatcher inter...
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector. 
void min_cpu(vector_base< T > const &vec, T &result)
Computes the minimum of a vector with final reduction on the CPU. 
vector< NumericT > operator-=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 -= A * v2, where A is a matrix. 
Implementations of NMF operations using OpenCL. 
Exception class in case of memory errors. 
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU. 
void max_cpu(vector_base< T > const &vec, T &result)
Computes the maximum of a vector with final reduction on the CPU. 
Generic size and resize functionality for different vector and matrix types. 
Defines the worker class for decomposing an expression tree into small chunks, which can be processed...
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector. 
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors. 
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the sum of all elements from the vector. 
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vector< NumericT > operator+=(vector_base< NumericT > &v1, const viennacl::vector_expression< const matrix_base< NumericT >, const vector_base< NumericT >, viennacl::op_prod > &proxy)
Implementation of the operation v1 += A * v2, where A is a matrix. 
void sum_impl(vector_base< T > const &vec, scalar< T > &result)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors. 
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^2-norm of a vector with final reduction on the CPU - dispatcher interface. 
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector. 
void max_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU. 
This file provides the forward declarations for the main types used within ViennaCL. 
Determines row and column increments for matrices and matrix proxies. 
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA. 
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector, where the result is stored in an OpenCL buffer. ...
void norm_1_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the l^1-norm of a vector. 
An expression template class that represents a binary operation that yields a vector. 
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
A tag class representing inplace addition. 
void max_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied. 
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - dispatcher interface. 
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
viennacl::vector< float > v1
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
void max_impl(vector_base< T > const &vec, scalar< T > &result)
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, vector_base< T > &partial_result)
Computes the partial inner product of two vectors - implementation. Library users should call inner_p...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector. 
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA. 
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation. 
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum value of a vector, where the result is stored in an OpenCL buffer. 
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, scalar< T > &result)
Computes the inner product of two vectors - dispatcher interface. 
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the maximum value of a vector, where the value is stored in a host value. 
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on the CPU. 
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU. 
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the minimum of a vector. 
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied. 
#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname)
void norm_inf_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the supremum-norm of a vector. 
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
void inner_prod_cpu(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied. 
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector. 
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector - dispatcher interface. 
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector. 
void sum_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
Common base class for dense vectors, vector ranges, and vector slices. 
void sum_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU. 
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void min_impl(vector_base< T > const &vec, scalar< T > &result)
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA. 
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP. 
void min_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU...
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C) 
void norm_1_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^1-norm of a vector. 
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan. 
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors. 
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
void min_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the maximum of a vector, both reduction stages run on the GPU. 
A tag class representing inplace subtraction. 
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector. 
void sum_cpu(vector_base< T > const &vec, T &result)
Computes the sum of a vector with final reduction on the CPU. 
void element_op(matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy)
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA. 
void norm_inf_cpu(vector_base< NumericT > const &vec1, NumericT &result)
Computes the supremum-norm of a vector. 
cl_uint index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
size_type size() const 
Returns the length of the vector (cf. std::vector) 
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector. 
VectorType const & const_at(vcl_size_t i) const 
Implementation of a range object for use with proxy objects. 
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan. 
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied. 
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors. 
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Implementation of the ViennaCL scalar class. 
Implementations of NMF operations using CUDA. 
void norm_2_impl(vector_base< NumericT > const &vec1, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation. 
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector with final reduction on the CPU. 
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector. 
Simple enable-if variant that uses the SFINAE pattern. 
memory_types get_active_handle_id() const 
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...