58 template<
typename NumericT>
 
   61   if (std::fabs(s1 - s2) > 0)
 
   62     return (s1 - s2) / 
std::max(std::fabs(s1), std::fabs(s2));
 
   66 template<
typename NumericT>
 
   69   std::vector<NumericT> v2_cpu(v2.
size());
 
   73   for (std::size_t i=0;i<v1.size(); ++i)
 
   75     if ( 
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
 
   76       v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / 
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
 
   80     if (v2_cpu[i] > 0.0001)
 
   83       std::cout << 
"Error at entry " << i   << 
": " << v1[i]   << 
" vs. " << v2[i]   << std::endl;
 
   90   for (std::size_t i=0;i<v2_cpu.size(); ++i)
 
   91     inf_norm = std::max<NumericT>(inf_norm, std::fabs(v2_cpu[i]));
 
   96 template<
typename NumericT>
 
  100   for (std::size_t i=0; i<A2.
size1(); ++i)
 
  101     for (std::size_t j=0; j<A2.
size2(); ++j)
 
  109   return diff(host_values, vcl_device_values);
 
  113 template<
typename HostContainerT, 
typename DeviceContainerT, 
typename NumericT>
 
  114 void check(HostContainerT 
const & host_container, DeviceContainerT 
const & device_container,
 
  115            std::string current_stage, 
NumericT epsilon)
 
  117   current_stage.resize(25, 
' ');
 
  118   std::cout << 
"Testing operation: " << current_stage;
 
  119   NumericT rel_error = std::fabs(
diff(host_container, device_container));
 
  121   if (rel_error > epsilon)
 
  123     std::cout << std::endl;
 
  124     std::cout << 
"# Error at operation: " << current_stage << std::endl;
 
  125     std::cout << 
"  diff: " << rel_error << std::endl;
 
  128   std::cout << 
"PASS" << std::endl;
 
  134   template<
typename LHS, 
typename RHS>
 
  135   static void apply(LHS & lhs, RHS 
const & rhs) { lhs = rhs; }
 
  137   static std::string 
str() { 
return "="; }
 
  142   template<
typename LHS, 
typename RHS>
 
  143   static void apply(LHS & lhs, RHS 
const & rhs) { lhs += rhs; }
 
  145   static std::string 
str() { 
return "+="; }
 
  150   template<
typename LHS, 
typename RHS>
 
  151   static void apply(LHS & lhs, RHS 
const & rhs) { lhs -= rhs; }
 
  153   static std::string 
str() { 
return "-="; }
 
  160 template<
typename OpT, 
typename NumericT, 
typename HostMatrixT, 
typename DeviceMatrixT>
 
  162                  HostMatrixT &   host_A,   HostMatrixT &   host_B, HostMatrixT & host_C,
 
  163                DeviceMatrixT & device_A, std::string name_A,
 
  164                DeviceMatrixT & device_B, std::string name_B,
 
  165                DeviceMatrixT & device_C, 
bool copy_from_A,
 
  166                bool trans_first, 
bool trans_second)
 
  170   for (std::size_t i = 0; i<host_A.size(); ++i)
 
  171     for (std::size_t j = 0; j<host_A[i].size(); ++j)
 
  173       host_A[i][j] = randomNumber();
 
  174       host_B[i][j] = randomNumber();
 
  184   for (std::size_t i = 0; i<host_A.size(); ++i)
 
  185     for (std::size_t j = 0; j<host_A[i].size(); ++j)
 
  188       for (std::size_t k = 0; k<host_A[i].size(); ++k)
 
  189         tmp +=   (trans_first ? host_A[k][i] : host_A[i][k])
 
  190                * (trans_second ? host_B[j][k] : host_B[k][j]);
 
  191       OpT::apply(host_C[i][j], tmp);
 
  194   if (trans_first && trans_second)
 
  197     check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
"^T"), epsilon);
 
  199   else if (trans_first && !trans_second)
 
  202     check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
""), epsilon);
 
  204   else if (!trans_first && trans_second)
 
  207     check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
"^T"), epsilon);
 
  212     check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
""), epsilon);
 
  218 template<
typename OpT, 
typename NumericT, 
typename HostMatrixT, 
typename DeviceMatrixT>
 
  220                  HostMatrixT &   host_A,   HostMatrixT &   host_B, HostMatrixT & host_C,
 
  221                DeviceMatrixT & device_A, std::string name_A,
 
  222                DeviceMatrixT & device_B, std::string name_B,
 
  223                DeviceMatrixT & device_C, 
bool copy_from_A)
 
  225   test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, 
false, 
false);
 
  226   test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, 
false, 
true);
 
  227   test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, 
true, 
false);
 
  228   test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, 
true, 
true);
 
  234 template<
typename NumericT>
 
  245   std::vector<NumericT> std_x(N);
 
  246   std::vector<NumericT> std_y(N);
 
  247   std::vector<NumericT> std_z(N);
 
  249   for (std::size_t i=0; i<std_x.size(); ++i)
 
  251   for (std::size_t i=0; i<std_y.size(); ++i)
 
  253   for (std::size_t i=0; i<std_z.size(); ++i)
 
  266   check(std_x, vcl_x, 
"x = x", epsilon);
 
  269   std_x[0] = std_x[2]; std_x[1] = std_x[3];
 
  271   check(std_x, vcl_x, 
"x = x (range)", epsilon);
 
  277   std::vector<std::vector<NumericT> > std_A(N, std::vector<NumericT>(N, 
NumericT(1)));
 
  278   std::vector<std::vector<NumericT> > std_B(N, std::vector<NumericT>(N, 
NumericT(2)));
 
  279   std::vector<std::vector<NumericT> > std_C(N, std::vector<NumericT>(N, 
NumericT(3)));
 
  291   check(std_A, vcl_A, 
"A = A", epsilon);
 
  294   std_A[0][0] = std_A[0][2]; std_A[0][1] = std_A[0][3];
 
  296   check(std_A, vcl_A, 
"A = A (range)", epsilon);
 
  299   for (std::size_t i = 0; i<std_y.size(); ++i)
 
  302     for (std::size_t j = 0; j<std_x.size(); ++j)
 
  303       val += std_A[i][j] * std_x[j];
 
  307   check(std_y, vcl_x, 
"x = A*x", epsilon);
 
  309   typedef unsigned int     KeyType;
 
  310   std::vector< std::map<KeyType, NumericT> > std_Asparse(N);
 
  312   for (std::size_t i=0; i<std_Asparse.size(); ++i)
 
  315       std_Asparse[i][KeyType(i-1)] = randomNumber();
 
  316     std_Asparse[i][KeyType(i)] = 
NumericT(1) + randomNumber();
 
  317     if (i < std_Asparse.size() - 1)
 
  318       std_Asparse[i][KeyType(i+1)] = randomNumber();
 
  334   for (std::size_t i=0; i<std_Asparse.size(); ++i)
 
  337     for (
typename std::map<unsigned int, NumericT>::const_iterator it = std_Asparse[i].begin(); it != std_Asparse[i].end(); ++it)
 
  338       val += it->second * std_x[it->first];
 
  344   check(std_y, vcl_x, 
"x = A*x (sparse, csr)", epsilon);
 
  348   check(std_y, vcl_x, 
"x = A*x (sparse, coo)", epsilon);
 
  352   check(std_y, vcl_x, 
"x = A*x (sparse, ell)", epsilon);
 
  356   check(std_y, vcl_x, 
"x = A*x (sparse, sell)", epsilon);
 
  360   check(std_y, vcl_x, 
"x = A*x (sparse, hyb)", epsilon);
 
  361   std::cout << std::endl;
 
  367   test_gemm<op_assign>(epsilon, std_A, std_B, std_C, vcl_A, 
"A", vcl_B, 
"B", vcl_A, 
true);
 
  368   test_gemm<op_assign>(epsilon, std_B, std_A, std_C, vcl_B, 
"B", vcl_A, 
"A", vcl_A, 
false);
 
  369   test_gemm<op_assign>(epsilon, std_A, std_A, std_C, vcl_A, 
"A", vcl_A, 
"A", vcl_A, 
true);
 
  370   std::cout << std::endl;
 
  372   test_gemm<op_plus_assign>(epsilon, std_A, std_B, std_C, vcl_A, 
"A", vcl_B, 
"B", vcl_A, 
true);
 
  373   test_gemm<op_plus_assign>(epsilon, std_B, std_A, std_C, vcl_B, 
"B", vcl_A, 
"A", vcl_A, 
false);
 
  374   test_gemm<op_plus_assign>(epsilon, std_A, std_A, std_C, vcl_A, 
"A", vcl_A, 
"A", vcl_A, 
true);
 
  375   std::cout << std::endl;
 
  377   test_gemm<op_minus_assign>(epsilon, std_A, std_B, std_C, vcl_A, 
"A", vcl_B, 
"B", vcl_A, 
true);
 
  378   test_gemm<op_minus_assign>(epsilon, std_B, std_A, std_C, vcl_B, 
"B", vcl_A, 
"A", vcl_A, 
false);
 
  379   test_gemm<op_minus_assign>(epsilon, std_A, std_A, std_C, vcl_A, 
"A", vcl_A, 
"A", vcl_A, 
true);
 
  380   std::cout << std::endl;
 
  389   for (std::size_t i = 0; i<std_A.size(); ++i)
 
  390     for (std::size_t j = 0; j<std_A[i].size(); ++j)
 
  393       for (std::size_t k = 0; k<std_A[i].size(); ++k)
 
  394         tmp += std_Asparse[i][KeyType(k)] * std_A[k][j];
 
  400   check(std_C, vcl_A, 
"A = csr*A", epsilon);
 
  404   check(std_C, vcl_A, 
"A = coo*A", epsilon);
 
  408   check(std_C, vcl_A, 
"A = ell*A", epsilon);
 
  416   check(std_C, vcl_A, 
"A = hyb*A", epsilon);
 
  420   for (std::size_t i = 0; i<std_A.size(); ++i)
 
  421     for (std::size_t j = 0; j<std_A[i].size(); ++j)
 
  424       for (std::size_t k = 0; k<std_A[i].size(); ++k)
 
  425         tmp += std_Asparse[i][KeyType(k)] * std_A[j][k];
 
  431   check(std_C, vcl_A, 
"A = csr*A^T", epsilon);
 
  435   check(std_C, vcl_A, 
"A = coo*A^T", epsilon);
 
  439   check(std_C, vcl_A, 
"A = ell*A^T", epsilon);
 
  447   check(std_C, vcl_A, 
"A = hyb*A^T", epsilon);
 
  458   std::cout << std::endl;
 
  459   std::cout << 
"----------------------------------------------" << std::endl;
 
  460   std::cout << 
"----------------------------------------------" << std::endl;
 
  461   std::cout << 
"## Test :: Self-Assignment" << std::endl;
 
  462   std::cout << 
"----------------------------------------------" << std::endl;
 
  463   std::cout << 
"----------------------------------------------" << std::endl;
 
  464   std::cout << std::endl;
 
  466   int retval = EXIT_SUCCESS;
 
  468   std::cout << std::endl;
 
  469   std::cout << 
"----------------------------------------------" << std::endl;
 
  470   std::cout << std::endl;
 
  473     NumericT epsilon = 
static_cast<NumericT
>(1E-4);
 
  474     std::cout << 
"# Testing setup:" << std::endl;
 
  475     std::cout << 
"  eps:     " << epsilon << std::endl;
 
  476     std::cout << 
"  numeric: float" << std::endl;
 
  477     retval = test<NumericT>(epsilon);
 
  478     if ( retval == EXIT_SUCCESS )
 
  479         std::cout << 
"# Test passed" << std::endl;
 
  483   std::cout << std::endl;
 
  484   std::cout << 
"----------------------------------------------" << std::endl;
 
  485   std::cout << std::endl;
 
  489   std::cout << std::endl;
 
  490   std::cout << 
"------- Test completed --------" << std::endl;
 
  491   std::cout << std::endl;
 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void test_gemm(NumericT epsilon, HostMatrixT &host_A, HostMatrixT &host_B, HostMatrixT &host_C, DeviceMatrixT &device_A, std::string name_A, DeviceMatrixT &device_B, std::string name_B, DeviceMatrixT &device_C, bool copy_from_A, bool trans_first, bool trans_second)
A reader and writer for the matrix market format is implemented here. 
NumericT diff(NumericT const &s1, viennacl::scalar< NumericT > const &s2)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
size_type internal_size() const 
Returns the total amount of allocated memory in multiples of sizeof(NumericT) 
std::vector< std::vector< NumericT > > trans(std::vector< std::vector< NumericT > > const &A)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class. 
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum. 
Implementation of the coordinate_matrix class. 
static void apply(LHS &lhs, RHS const &rhs)
viennacl::vector< float > v1
Implementation of the hyb_matrix class. 
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like) 
Implementations of incomplete factorization preconditioners. Convenience header file. 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
Implementation of the compressed_matrix class. 
Implementation of the sliced_ell_matrix class. 
int test(NumericT epsilon)
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
size_type size2() const 
Returns the number of columns. 
Implementation of the ell_matrix class. 
size_type size1() const 
Returns the number of rows. 
Proxy classes for vectors. 
Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number o...
Proxy classes for matrices. 
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void apply(LHS &lhs, RHS const &rhs)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators. 
size_type size() const 
Returns the length of the vector (cf. std::vector) 
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
static void apply(LHS &lhs, RHS const &rhs)
size_type internal_size2() const 
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
iterator end()
Returns an iterator pointing to the end of the vector (STL like) 
void check(HostContainerT const &host_container, DeviceContainerT const &device_container, std::string current_stage, NumericT epsilon)
Common routines used within ILU-type preconditioners. 
Implementation of the ViennaCL scalar class. 
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)