1 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_ 
   55 template<
typename DestNumericT, 
typename SrcNumericT>
 
   58   assert(viennacl::traits::opencl_handle(dest).
context() == viennacl::traits::opencl_handle(src).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
   60   std::string kernel_name(
"convert_");
 
   75 template <
typename T, 
typename ScalarType1>
 
   77         vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha)
 
   79   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  107                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
 
  109                            viennacl::traits::opencl_handle(vec2),
 
  115 template <
typename T, 
typename ScalarType1, 
typename ScalarType2>
 
  117           vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
  118           vector_base<T> const & vec3, ScalarType2 
const & beta,  
vcl_size_t len_beta,  
bool reciprocal_beta,  
bool flip_sign_beta)
 
  120   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  121   assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  126   std::string kernel_name;
 
  128     kernel_name = 
"avbv_cpu_cpu";
 
  130     kernel_name = 
"avbv_cpu_gpu";
 
  132     kernel_name = 
"avbv_gpu_cpu";
 
  134     kernel_name = 
"avbv_gpu_gpu";
 
  164                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
 
  166                            viennacl::traits::opencl_handle(vec2),
 
  169                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
 
  171                            viennacl::traits::opencl_handle(vec3),
 
  177 template <
typename T, 
typename ScalarType1, 
typename ScalarType2>
 
  179             vector_base<T> const & vec2, ScalarType1 
const & alpha, 
vcl_size_t len_alpha, 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
  180             vector_base<T> const & vec3, ScalarType2 
const & beta,  
vcl_size_t len_beta,  
bool reciprocal_beta,  
bool flip_sign_beta)
 
  182   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  183   assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(vec3).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  188   std::string kernel_name;
 
  190     kernel_name = 
"avbv_v_cpu_cpu";
 
  192     kernel_name = 
"avbv_v_cpu_gpu";
 
  194     kernel_name = 
"avbv_v_gpu_cpu";
 
  196     kernel_name = 
"avbv_v_gpu_gpu";
 
  226                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)),
 
  228                            viennacl::traits::opencl_handle(vec2),
 
  231                            viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)),
 
  233                            viennacl::traits::opencl_handle(vec3),
 
  245 template <
typename T>
 
  261                            viennacl::traits::opencl_handle(T(alpha)) )
 
  271 template <
typename T>
 
  274   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  285                            viennacl::traits::opencl_handle(vec2),
 
  299 template <
typename T, 
typename OP>
 
  303   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  304   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  309   std::string kernel_name = 
"element_pow";
 
  314     kernel_name = 
"element_div";
 
  319     kernel_name = 
"element_prod";
 
  329                            viennacl::traits::opencl_handle(proxy.lhs()),
 
  333                            viennacl::traits::opencl_handle(proxy.rhs()),
 
  348 template <
typename T, 
typename OP>
 
  352   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.lhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  353   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(proxy.rhs()).
context() && bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  374                            viennacl::traits::opencl_handle(proxy.lhs()),
 
  387 template <
typename T>
 
  392   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  393   assert(viennacl::traits::opencl_handle(vec2).
context() == viennacl::traits::opencl_handle(partial_result).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  399         && 
bool(
"Incompatible vector sizes in inner_prod_impl()!"));
 
  419                            viennacl::traits::opencl_handle(vec2),
 
  422                            viennacl::traits::opencl_handle(partial_result)
 
  436 template <
typename T>
 
  441   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  442   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  448   temp.
resize(work_groups, ctx); 
 
  463                               viennacl::traits::opencl_handle(result) )
 
  469   template<
typename NumericT>
 
  487 template <
typename NumericT>
 
  492   assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  511   while (current_index < vec_tuple.
const_size())
 
  513     switch (vec_tuple.
const_size() - current_index)
 
  530                                                    viennacl::traits::opencl_handle(temp)
 
  535                                     cl_uint(work_groups),
 
  537                                     viennacl::traits::opencl_handle(result),
 
  556                                                     viennacl::traits::opencl_handle(temp)
 
  561                                     cl_uint(work_groups),
 
  563                                     viennacl::traits::opencl_handle(result),
 
  580                                                     viennacl::traits::opencl_handle(temp)
 
  585                                     cl_uint(work_groups),
 
  587                                     viennacl::traits::opencl_handle(result),
 
  602                                                     viennacl::traits::opencl_handle(temp)
 
  607                                     cl_uint(work_groups),
 
  609                                     viennacl::traits::opencl_handle(result),
 
  638                                                     viennacl::traits::opencl_handle(temp)
 
  643                                     cl_uint(work_groups),
 
  645                                     viennacl::traits::opencl_handle(result),
 
  668 template <
typename T>
 
  673   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  679   temp.
resize(work_groups, ctx); 
 
  687   std::vector<T> temp_cpu(work_groups);
 
  691   for (
typename std::vector<T>::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
  704 template <
typename T>
 
  709   assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(partial_result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  724                            viennacl::traits::opencl_handle(partial_result) )
 
  736 template <
typename T>
 
  740   assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  769 template <
typename T>
 
  780   typedef std::vector<typename viennacl::result_of::cl_type<T>::type>  CPUVectorType;
 
  782   CPUVectorType temp_cpu(work_groups);
 
  786   for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
  787     result += static_cast<T>(*it);
 
  800 template <
typename T>
 
  804   assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  833 template <
typename T>
 
  844   typedef std::vector<typename viennacl::result_of::cl_type<T>::type>  CPUVectorType;
 
  846   CPUVectorType temp_cpu(work_groups);
 
  850   for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
  851     result += static_cast<T>(*it);
 
  852   result = std::sqrt(result);
 
  864 template <
typename T>
 
  868   assert(viennacl::traits::opencl_handle(vec).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  897 template <
typename T>
 
  908   typedef std::vector<typename viennacl::result_of::cl_type<T>::type>  CPUVectorType;
 
  910   CPUVectorType temp_cpu(work_groups);
 
  914   for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
  915     result = 
std::max(result, static_cast<T>(*it));
 
  929 template <
typename T>
 
  952   cl_int err = clEnqueueReadBuffer(ctx.
get_queue().
handle().
get(), h.
get(), CL_TRUE, 0, 
sizeof(cl_uint), &result, 0, NULL, NULL);
 
  965 template<
typename NumericT>
 
  969   assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
  985                            viennacl::traits::opencl_handle(temp)
 
  994                            viennacl::traits::opencl_handle(result)
 
 1003 template<
typename NumericT>
 
 1021                            viennacl::traits::opencl_handle(temp)
 
 1025   typedef std::vector<typename viennacl::result_of::cl_type<NumericT>::type>  CPUVectorType;
 
 1027   CPUVectorType temp_cpu(work_groups);
 
 1030   result = 
static_cast<NumericT>(temp_cpu[0]);
 
 1031   for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
 1032     result = 
std::max(result, static_cast<NumericT>(*it));
 
 1044 template<
typename NumericT>
 
 1048   assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
 1064                            viennacl::traits::opencl_handle(temp)
 
 1073                            viennacl::traits::opencl_handle(result)
 
 1082 template<
typename NumericT>
 
 1100                            viennacl::traits::opencl_handle(temp)
 
 1104   typedef std::vector<typename viennacl::result_of::cl_type<NumericT>::type>  CPUVectorType;
 
 1106   CPUVectorType temp_cpu(work_groups);
 
 1109   result = 
static_cast<NumericT>(temp_cpu[0]);
 
 1110   for (
typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it)
 
 1111     result = 
std::min(result, static_cast<NumericT>(*it));
 
 1121 template<
typename NumericT>
 
 1125   assert(viennacl::traits::opencl_handle(x).
context() == viennacl::traits::opencl_handle(result).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
 1136 template<
typename NumericT>
 
 1155 template <
typename T>
 
 1160   assert(viennacl::traits::opencl_handle(vec1).
context() == viennacl::traits::opencl_handle(vec2).
context() && 
bool(
"Operands do not reside in the same OpenCL context. Automatic migration not yet supported!"));
 
 1172                            viennacl::traits::opencl_handle(vec2),
 
 1176                            viennacl::traits::opencl_handle(alpha),
 
 1177                            viennacl::traits::opencl_handle(beta))
 
 1192   template<
typename NumericT>
 
 1213                               output, cl_uint(output.
start()), cl_uint(output.
stride()),
 
 1214                               cl_uint(is_inclusive ? 0 : 1), opencl_carries.opencl_handle())
 
 1226                               opencl_carries.opencl_handle())
 
 1237 template<
typename NumericT>
 
 1250 template<
typename NumericT>
 
void min_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the minimum of a vector, where the result is stored on a CPU scalar. 
cl_uint stride
Increment between integers. 
vcl_size_t const_size() const 
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void avbv(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
Represents an OpenCL device within ViennaCL. 
void norm_1_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU. 
Generic size and resize functionality for different vector and matrix types. 
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors. 
viennacl::ocl::command_queue & get_queue()
Represents an OpenCL kernel within ViennaCL. 
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
cl_uint start
Starting value of the integer stride. 
static std::string program_name()
size_type local_work_size(int index=0) const 
Returns the local work size at the respective dimension. 
void norm_reduction_impl(vector_base< T > const &vec, vector_base< T > &partial_result, cl_uint norm_id)
Computes the partial work group results for vector norms. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for multiple inner products on/with viennacl::vector<...
void norm_inf_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the supremum-norm of a vector. 
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL. 
Determines row and column increments for matrices and matrix proxies. 
void min_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the minimum of a vector, where the result is stored in an OpenCL buffer. ...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector. 
T max(const T &lhs, const T &rhs)
Maximum. 
An expression template class that represents a binary operation that yields a vector. 
static void init(viennacl::ocl::context &ctx)
void avbv_v(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding. 
Common implementations shared by OpenCL-based operations. 
viennacl::ocl::handle< cl_command_queue > const & handle() const 
size_type stride() const 
Returns the stride within the buffer (in multiples of sizeof(NumericT)) 
#define VIENNACL_ERR_CHECK(err)
const OCL_TYPE & get() const 
void inner_prod_impl(vector_base< T > const &vec1, vector_base< T > const &vec2, vector_base< T > &partial_result)
Computes the partial inner product of two vectors - implementation. Library users should call inner_p...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
void sum_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the sum over all entries of a vector. 
A class representing local (shared) OpenCL memory. Typically used as kernel argument. 
void max_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the maximum value of a vector, where the result is stored in an OpenCL buffer. 
Helper struct for checking whether a type is a host scalar type (e.g. float, double) ...
void max_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the maximum value of a vector, where the value is stored in a host value. 
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like) 
void norm_2_cpu(vector_base< T > const &vec, T &result)
Computes the l^1-norm of a vector with final reduction on CPU. 
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
OpenCL kernel file for vector operations. 
Implementation of a smart-pointer-like class for handling OpenCL handles. 
result_of::size_type< T >::type start(T const &obj)
void av(vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign)
static void init(viennacl::ocl::context &ctx)
void norm_1_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^1-norm of a vector. 
void resize(size_type new_size, bool preserve=true)
Resizes the allocated memory for the vector. Pads the memory to be a multiple of 'AlignmentV'. 
Common base class for dense vectors, vector ranges, and vector slices. 
void inclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an inclusive scan using CUDA. 
Helper metafunction for checking whether the provided type is viennacl::op_div (for division) ...
OpenCL kernel file for scan operations. To be merged back to vector operations. 
void element_op(matrix_base< T > &A, matrix_expression< const matrix_base< T >, const matrix_base< T >, op_element_binary< OP > > const &proxy)
Implementation of binary element-wise operations A = OP(B,C) 
static void init(viennacl::ocl::context &ctx)
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void convert(matrix_base< DestNumericT > &dest, matrix_base< SrcNumericT > const &src)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
Representation of an OpenCL kernel in ViennaCL. 
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
void exclusive_scan(vector_base< NumericT > const &input, vector_base< NumericT > &output)
This function implements an exclusive scan using CUDA. 
cl_uint index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
size_type size() const 
Returns the length of the vector (cf. std::vector) 
void norm_inf_cpu(vector_base< T > const &vec, T &result)
Computes the supremum-norm of a vector. 
size_type global_work_size(int index=0) const 
Returns the global work size at the respective dimension. 
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
VectorType const & const_at(vcl_size_t i) const 
viennacl::ocl::packed_cl_uint make_layout(vector_base< NumericT > const &vec)
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
OpenCL kernel file for element-wise vector operations. 
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Forward declarations of the implicit_vector_base, vector_base class. 
T min(const T &lhs, const T &rhs)
Minimum. 
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
size_type internal_size() const 
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
iterator end()
Returns an iterator pointing to the end of the vector (STL like) 
Helper metafunction for checking whether the provided type is viennacl::op_prod (for products/multipl...
size_type start() const 
Returns the offset within the buffer. 
std::string op_to_string(op_abs)
Helper class for converting a type to its string representation. 
void scan_impl(vector_base< NumericT > const &input, vector_base< NumericT > &output, bool is_inclusive)
Worker routine for scan routines using OpenCL. 
void vector_swap(vector_base< T > &vec1, vector_base< T > &vec2)
Swaps the contents of two vectors, data is copied. 
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void inner_prod_cpu(vector_base< T > const &vec1, vector_base< T > const &vec2, T &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
Implementation of the ViennaCL scalar class. 
static void init(viennacl::ocl::context &ctx)
void sum_cpu(vector_base< NumericT > const &x, NumericT &result)
Computes the sum over all entries of a vector. 
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Simple enable-if variant that uses the SFINAE pattern. 
cl_uint size
Number of values in the stride. 
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const 
Creates a memory buffer within the context.