38 #include <boost/numeric/ublas/io.hpp> 
   39 #include <boost/numeric/ublas/triangular.hpp> 
   40 #include <boost/numeric/ublas/matrix_sparse.hpp> 
   41 #include <boost/numeric/ublas/matrix.hpp> 
   42 #include <boost/numeric/ublas/matrix_proxy.hpp> 
   43 #include <boost/numeric/ublas/lu.hpp> 
   44 #include <boost/numeric/ublas/io.hpp> 
   48 #define VIENNACL_WITH_UBLAS 1 
   61 #define BLAS3_MATRIX_SIZE   1500 
   63 using namespace boost::numeric;
 
   87   std::vector<ScalarType> stl_B(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE);
 
   88   std::vector<ScalarType> stl_C(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE);
 
   93   for (
unsigned int i = 0; i < ublas_A.size1(); ++i)
 
   94     for (
unsigned int j = 0; j < ublas_A.size2(); ++j)
 
   96       ublas_A(i,j) = randomNumber();
 
   97       stl_A[i*ublas_A.size2() + j] = ublas_A(i,j);
 
  100   for (
unsigned int i = 0; i < ublas_B.size1(); ++i)
 
  101     for (
unsigned int j = 0; j < ublas_B.size2(); ++j)
 
  103       ublas_B(i,j) = randomNumber();
 
  104       stl_B[i + j*ublas_B.size1()] = ublas_B(i,j);
 
  109   ublas::matrix_range< ublas::matrix<ScalarType> >  ublas_A_sub(ublas_A, ublas_r1, ublas_r2);
 
  110   ublas::matrix_range< ublas::matrix<ScalarType, ublas::column_major> >  ublas_B_sub(ublas_B, ublas_r2, ublas_r1);
 
  111   ublas::matrix_range< ublas::matrix<ScalarType> >  ublas_C_sub(ublas_C, ublas_r1, ublas_r1);
 
  137   std::cout << 
"--- Computing matrix-matrix product using ublas ---" << std::endl;
 
  139   ublas_C_sub = 
ublas::prod(ublas_A_sub, ublas_B_sub);
 
  140   exec_time = timer.
get();
 
  141   std::cout << 
" - Execution time: " << exec_time << std::endl;
 
  148   std::cout << std::endl << 
"--- Computing matrix-matrix product on each available compute device using ViennaCL ---" << std::endl;
 
  150   for (std::size_t i=0; i<devices.size(); ++i)
 
  158                         &(stl_A[0]) + stl_A.size(),
 
  161                         &(stl_B[0]) + stl_B.size(),
 
  168     exec_time = timer.
get();
 
  169     std::cout << 
" - Execution time on device (no setup time included): " << exec_time << std::endl;
 
  170     std::cout << 
" - GFLOPs: " << (vcl_A.
size1() / 1000.0) * (vcl_A.
size2() / 1000.0) * (vcl_B.
size2() / 1000.0) / exec_time << std::endl;
 
  179     for (
unsigned int i = 0; i < ublas_C1.size1(); ++i)
 
  180       for (
unsigned int j = 0; j < ublas_C1.size2(); ++j)
 
  181         ublas_C1(i,j) = stl_C[i * ublas_C1.size2() + j];
 
  183     std::cout << 
" - Checking result... ";
 
  184     bool check_ok = 
true;
 
  185     for (
unsigned int i = 0; i < ublas_A.size1(); ++i)
 
  187       for (
unsigned int j = 0; j < ublas_A.size2(); ++j)
 
  189         if ( fabs(ublas_C1(i,j) - ublas_C(i,j)) / ublas_C(i,j) > 1e-4 )
 
  199       std::cout << 
"[OK]" << std::endl << std::endl;
 
  201       std::cout << 
"[FAILED]" << std::endl << std::endl;
 
  208   std::cout << 
"!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
 
void finish() const 
Waits until all kernels in the queue have finished their execution. 
void switch_device(vcl_size_t i)
Switches the current device to the i-th device in this context. 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class. 
viennacl::ocl::context & current_context()
Convenience function for returning the current context. 
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context. 
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
size_type size2() const
Returns the number of columns. 
std::string name() const 
Device name string. 
size_type size1() const
Returns the number of rows. 
#define BLAS3_MATRIX_SIZE
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. 
Proxy classes for matrices. 
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators. 
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
Class for representing non-strided submatrices of a bigger matrix A. 
std::vector< viennacl::ocl::device > const & devices() const 
Returns a vector with all devices in this context. 
Implementation of the ViennaCL scalar class. 
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)