32 template<
typename ScalarType>
 
   35   if (s1 > s2 || s1 < s2)
 
   36     return (s1 - s2) / 
std::max(std::fabs(s1), std::fabs(s2));
 
   40 template<
typename ScalarType, 
typename ViennaCLVectorType>
 
   41 ScalarType diff(std::vector<ScalarType> 
const & 
v1, ViennaCLVectorType 
const & vcl_vec)
 
   43    std::vector<ScalarType> v2_cpu(vcl_vec.size());
 
   48    for (
unsigned int i=0;i<v1.size(); ++i)
 
   50       if ( 
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
 
   51          v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / 
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
 
   55       if (v2_cpu[i] > inf_norm)
 
   62 template<
typename T, 
typename U, 
typename EpsilonT>
 
   63 void check(T 
const & t, U 
const & u, EpsilonT eps)
 
   65   EpsilonT rel_error = std::fabs(static_cast<EpsilonT>(
diff(t,u)));
 
   68     std::cerr << 
"Relative error: " << rel_error << std::endl;
 
   69     std::cerr << 
"Aborting!" << std::endl;
 
   72   std::cout << 
"SUCCESS ";
 
   77   std::size_t 
size1  = 13; 
 
   78   std::size_t 
size2  = 11; 
 
   79   float  eps_float  = 1e-5f;
 
   80   double eps_double = 1e-12;
 
   85   std::vector<float> ref_float_x(size1); 
for (std::size_t i=0; i<size1; ++i) ref_float_x[i] = static_cast<float>(i);
 
   86   std::vector<float> ref_float_y(size2); 
for (std::size_t i=0; i<size2; ++i) ref_float_y[i] = static_cast<float>(size2 - i);
 
   87   std::vector<float> ref_float_A(size1*size2); 
for (std::size_t i=0; i<size1*size2; ++i) ref_float_A[i] = static_cast<float>(3*i);
 
   88   std::vector<float> ref_float_B(size1*size2); 
for (std::size_t i=0; i<size1*size2; ++i) ref_float_B[i] = static_cast<float>(2*i);
 
   90   std::vector<double> ref_double_x(size1, 1.0); 
for (std::size_t i=0; i<size1; ++i) ref_double_x[i] = static_cast<double>(i);
 
   91   std::vector<double> ref_double_y(size2, 2.0); 
for (std::size_t i=0; i<size2; ++i) ref_double_y[i] = static_cast<double>(size2 - i);
 
   92   std::vector<double> ref_double_A(size1*size2, 3.0); 
for (std::size_t i=0; i<size1*size2; ++i) ref_double_A[i] = static_cast<double>(3*i);
 
   93   std::vector<double> ref_double_B(size1*size2, 4.0); 
for (std::size_t i=0; i<size1*size2; ++i) ref_double_B[i] = static_cast<double>(2*i);
 
  107 #ifdef VIENNACL_WITH_CUDA 
  120 #ifdef VIENNACL_WITH_OPENCL 
  143   check(ref_float_x, host_float_x, eps_float);
 
  144   check(ref_float_y, host_float_y, eps_float);
 
  145   check(ref_float_A, host_float_A, eps_float);
 
  146   check(ref_float_B, host_float_B, eps_float);
 
  147   check(ref_double_x, host_double_x, eps_double);
 
  148   check(ref_double_y, host_double_y, eps_double);
 
  149   check(ref_double_A, host_double_A, eps_double);
 
  150   check(ref_double_B, host_double_B, eps_double);
 
  151 #ifdef VIENNACL_WITH_CUDA 
  152   check(ref_float_x, cuda_float_x, eps_float);
 
  153   check(ref_float_y, cuda_float_y, eps_float);
 
  154   check(ref_float_A, cuda_float_A, eps_float);
 
  155   check(ref_float_B, cuda_float_B, eps_float);
 
  156   check(ref_double_x, cuda_double_x, eps_double);
 
  157   check(ref_double_y, cuda_double_y, eps_double);
 
  158   check(ref_double_A, cuda_double_A, eps_double);
 
  159   check(ref_double_B, cuda_double_B, eps_double);
 
  161 #ifdef VIENNACL_WITH_OPENCL 
  162   check(ref_float_x, opencl_float_x, eps_float);
 
  163   check(ref_float_y, opencl_float_y, eps_float);
 
  164   check(ref_float_A, opencl_float_A, eps_float);
 
  165   check(ref_float_B, opencl_float_B, eps_float);
 
  168     check(ref_double_x, *opencl_double_x, eps_double);
 
  169     check(ref_double_y, *opencl_double_y, eps_double);
 
  170     check(ref_double_A, *opencl_double_A, eps_double);
 
  171     check(ref_double_B, *opencl_double_B, eps_double);
 
  176   std::cout << std::endl << 
"-- Testing xGEMV...";
 
  177   for (std::size_t i=0; i<size1/3; ++i)
 
  179     ref_float_x[i * 2 + 1] *= 0.1234f;
 
  180     ref_double_x[i * 2 + 1] *= 0.1234;
 
  181     for (std::size_t j=0; j<size2/4; ++j)
 
  183       ref_float_x[i * 2 + 1]  += 3.1415f * ref_float_A[(2*i+2) * size2 + 3 * j + 1] * ref_float_y[j * 3 + 1];
 
  184       ref_double_x[i * 2 + 1] += 3.1415  * ref_double_A[(2*i+2) * size2 + 3 * j + 1] * ref_double_y[j * 3 + 1];
 
  188   std::cout << std::endl << 
"Host: ";
 
  191                     ViennaCLInt(size1/3), 
ViennaCLInt(size2/4), 3.1415f, viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_A), 2, 1, 2, 3, 
ViennaCLInt(size2),
 
  192                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_y), 1, 3,
 
  194                     viennacl::linalg::host_based::detail::extract_raw_pointer<float>(host_float_x), 1, 2);
 
  195   check(ref_float_x, host_float_x, eps_float);
 
  198                     ViennaCLInt(size1/3), 
ViennaCLInt(size2/4), 3.1415, viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_A), 2, 1, 2, 3, 
ViennaCLInt(size2),
 
  199                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_y), 1, 3,
 
  201                     viennacl::linalg::host_based::detail::extract_raw_pointer<double>(host_double_x), 1, 2);
 
  202   check(ref_double_x, host_double_x, eps_double);
 
  205 #ifdef VIENNACL_WITH_CUDA 
  206   std::cout << std::endl << 
"CUDA: ";
 
  213   check(ref_float_x, cuda_float_x, eps_float);
 
  220   check(ref_double_x, cuda_double_x, eps_double);
 
  223 #ifdef VIENNACL_WITH_OPENCL 
  224   std::cout << std::endl << 
"OpenCL: ";
 
  228                       viennacl::traits::opencl_handle(opencl_float_y), 1, 3,
 
  230                       viennacl::traits::opencl_handle(opencl_float_x), 1, 2);
 
  231   check(ref_float_x, opencl_float_x, eps_float);
 
  237                         viennacl::traits::opencl_handle(*opencl_double_y), 1, 3,
 
  239                         viennacl::traits::opencl_handle(*opencl_double_x), 1, 2);
 
  240     check(ref_double_x, *opencl_double_x, eps_double);
 
  246 #ifdef VIENNACL_WITH_OPENCL 
  247   delete opencl_double_x;
 
  248   delete opencl_double_y;
 
  249   delete opencl_double_A;
 
  250   delete opencl_double_B;
 
  258   std::cout << std::endl << 
"!!!! TEST COMPLETED SUCCESSFULLY !!!!" << std::endl;
 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendCreate(ViennaCLBackend *backend)
Generic backend for CUDA, OpenCL, host-based stuff. 
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, double beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendSetOpenCLContextID(ViennaCLBackend backend, ViennaCLInt context_id)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum. 
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context. 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, float *x, ViennaCLInt offx, ViennaCLInt incx, float beta, float *y, ViennaCLInt offy, ViennaCLInt incy)
viennacl::vector< float > v1
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLHostDgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy)
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLBackendDestroy(ViennaCLBackend *backend)
bool double_support() const 
ViennaCL convenience function: Returns true if the device supports double precision. 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, double *x, ViennaCLInt offx, ViennaCLInt incx, double beta, double *y, ViennaCLInt offy, ViennaCLInt incy)
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLOpenCLSgemv(ViennaCLBackend backend, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLInt m, ViennaCLInt n, float alpha, cl_mem A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda, cl_mem x, ViennaCLInt offx, ViennaCLInt incx, float beta, cl_mem y, ViennaCLInt offy, ViennaCLInt incy)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
NumericT * cuda_arg(scalar< NumericT > &obj)
Convenience helper function for extracting the CUDA handle from a ViennaCL scalar. Non-const version. 
void check(T const &t, U const &u, EpsilonT eps)
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context. 
ScalarType diff(ScalarType const &s1, ScalarType const &s2)