29 #define VIENNACL_WITH_UBLAS 1 
   31 #include <boost/numeric/ublas/triangular.hpp> 
   32 #include <boost/numeric/ublas/vector.hpp> 
   33 #include <boost/numeric/ublas/vector_proxy.hpp> 
   34 #include <boost/numeric/ublas/matrix_sparse.hpp> 
   35 #include <boost/numeric/ublas/operation_sparse.hpp> 
   36 #include <boost/numeric/ublas/lu.hpp> 
   57 #define BENCHMARK_RUNS          10 
   60 inline void printOps(
double num_ops, 
double exec_time)
 
   62   std::cout << 
"GFLOPs: " << num_ops / (1000000 * exec_time * 1000) << std::endl;
 
   66 template<
typename ScalarType>
 
   77   boost::numeric::ublas::vector<ScalarType> ublas_vec1;
 
   78   boost::numeric::ublas::vector<ScalarType> ublas_vec2;
 
   80   boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix;
 
   83     std::cout << 
"Error reading Matrix file" << std::endl;
 
   87   std::cout << 
"done reading matrix" << std::endl;
 
   89   ublas_vec1 = boost::numeric::ublas::scalar_vector<ScalarType>(ublas_matrix.size1(), 
ScalarType(1.0));
 
   90   ublas_vec2 = ublas_vec1;
 
  107   #ifndef VIENNACL_EXPERIMENTAL_DOUBLE_PRECISION_WITH_STREAM_SDK_ON_GPU 
  121   std::cout << 
"------- Matrix-Vector product on CPU ----------" << std::endl;
 
  126     boost::numeric::ublas::axpy_prod(ublas_matrix, ublas_vec2, ublas_vec1, 
true);
 
  128   exec_time = timer.
get();
 
  129   std::cout << 
"CPU time: " << exec_time << std::endl;
 
  130   std::cout << 
"CPU "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  131   std::cout << ublas_vec1[0] << std::endl;
 
  134   std::cout << 
"------- Matrix-Vector product with compressed_matrix ----------" << std::endl;
 
  149   exec_time = timer.
get();
 
  150   std::cout << 
"GPU time align1: " << exec_time << std::endl;
 
  151   std::cout << 
"GPU align1 "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  152   std::cout << vcl_vec1[0] << std::endl;
 
  154   std::cout << 
"Testing triangular solves: compressed_matrix" << std::endl;
 
  159   std::cout << 
"ublas..." << std::endl;
 
  162   std::cout << 
"Time elapsed: " << timer.
get() << std::endl;
 
  163   std::cout << 
"ViennaCL..." << std::endl;
 
  168   std::cout << 
"Time elapsed: " << timer.
get() << std::endl;
 
  179   exec_time = timer.
get();
 
  180   std::cout << 
"GPU time align4: " << exec_time << std::endl;
 
  181   std::cout << 
"GPU align4 "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  182   std::cout << vcl_vec1[0] << std::endl;
 
  191   exec_time = timer.
get();
 
  192   std::cout << 
"GPU time align8: " << exec_time << std::endl;
 
  193   std::cout << 
"GPU align8 "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  194   std::cout << vcl_vec1[0] << std::endl;
 
  197   std::cout << 
"------- Matrix-Vector product with coordinate_matrix ----------" << std::endl;
 
  203   for (std::size_t i=0; i<ublas_vec1.size(); ++i)
 
  205     if ( fabs(ublas_vec1[i] - ublas_vec2[i]) / 
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
 
  207       std::cout << 
"Error at index " << i << 
": Should: " << ublas_vec1[i] << 
", Is: " << ublas_vec2[i] << std::endl;
 
  221   exec_time = timer.
get();
 
  222   std::cout << 
"GPU time: " << exec_time << std::endl;
 
  223   std::cout << 
"GPU "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  224   std::cout << vcl_vec1[0] << std::endl;
 
  227   std::cout << 
"------- Matrix-Vector product with ell_matrix ----------" << std::endl;
 
  233   for (std::size_t i=0; i<ublas_vec1.size(); ++i)
 
  235     if ( fabs(ublas_vec1[i] - ublas_vec2[i]) / 
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
 
  237       std::cout << 
"Error at index " << i << 
": Should: " << ublas_vec1[i] << 
", Is: " << ublas_vec2[i] << std::endl;
 
  251   exec_time = timer.
get();
 
  252   std::cout << 
"GPU time: " << exec_time << std::endl;
 
  253   std::cout << 
"GPU "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  254   std::cout << vcl_vec1[0] << std::endl;
 
  257   std::cout << 
"------- Matrix-Vector product with hyb_matrix ----------" << std::endl;
 
  263   for (std::size_t i=0; i<ublas_vec1.size(); ++i)
 
  265     if ( fabs(ublas_vec1[i] - ublas_vec2[i]) / 
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
 
  267       std::cout << 
"Error at index " << i << 
": Should: " << ublas_vec1[i] << 
", Is: " << ublas_vec2[i] << std::endl;
 
  281   exec_time = timer.
get();
 
  282   std::cout << 
"GPU time: " << exec_time << std::endl;
 
  283   std::cout << 
"GPU "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  284   std::cout << vcl_vec1[0] << std::endl;
 
  287   std::cout << 
"------- Matrix-Vector product with sliced_ell_matrix ----------" << std::endl;
 
  293   for (std::size_t i=0; i<ublas_vec1.size(); ++i)
 
  295     if ( fabs(ublas_vec1[i] - ublas_vec2[i]) / 
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
 
  297       std::cout << 
"Error at index " << i << 
": Should: " << ublas_vec1[i] << 
", Is: " << ublas_vec2[i] << std::endl;
 
  311   exec_time = timer.
get();
 
  312   std::cout << 
"GPU time: " << exec_time << std::endl;
 
  313   std::cout << 
"GPU "; 
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) / 
static_cast<double>(
BENCHMARK_RUNS));
 
  314   std::cout << vcl_vec1[0] << std::endl;
 
  322   std::cout << std::endl;
 
  323   std::cout << 
"----------------------------------------------" << std::endl;
 
  324   std::cout << 
"               Device Info" << std::endl;
 
  325   std::cout << 
"----------------------------------------------" << std::endl;
 
  327 #ifdef VIENNACL_WITH_OPENCL 
  330   std::cout << std::endl;
 
  331   std::cout << 
"----------------------------------------------" << std::endl;
 
  332   std::cout << 
"----------------------------------------------" << std::endl;
 
  333   std::cout << 
"## Benchmark :: Sparse" << std::endl;
 
  334   std::cout << 
"----------------------------------------------" << std::endl;
 
  335   std::cout << std::endl;
 
  336   std::cout << 
"   -------------------------------" << std::endl;
 
  337   std::cout << 
"   # benchmarking single-precision" << std::endl;
 
  338   std::cout << 
"   -------------------------------" << std::endl;
 
  339   run_benchmark<float>();
 
  340 #ifdef VIENNACL_WITH_OPENCL 
  344     std::cout << std::endl;
 
  345     std::cout << 
"   -------------------------------" << std::endl;
 
  346     std::cout << 
"   # benchmarking double-precision" << std::endl;
 
  347     std::cout << 
"   -------------------------------" << std::endl;
 
  348     run_benchmark<double>();
 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
A reader and writer for the matrix market format is implemented here. 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
std::vector< std::vector< NumericT > > trans(std::vector< std::vector< NumericT > > const &A)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
T max(const T &lhs, const T &rhs)
Maximum. 
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context. 
Implementation of the coordinate_matrix class. 
std::string info(vcl_size_t indent=0, char indent_char= ' ') const 
Returns an info string with a few properties of the device. Use full_info() to get all details...
Implementation of the hyb_matrix class. 
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
void printOps(double num_ops, double exec_time)
Implementations of incomplete factorization preconditioners. Convenience header file. 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
Implementation of the compressed_matrix class. 
Implementation of the sliced_ell_matrix class. 
bool double_support() const 
ViennaCL convenience function: Returns true if the device supports double precision. 
Implementation of the ell_matrix class. 
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. 
void prod(std::vector< std::map< IndexT, NumericT > > const &stl_A, std::vector< std::map< IndexT, NumericT > > const &stl_B, std::vector< std::map< IndexT, NumericT > > &stl_C)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A tag class representing a lower triangular matrix with unit diagonal. 
A sparse square matrix in compressed sparse rows format. 
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format) 
Implementation of the ViennaCL scalar class. 
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...