29 #ifndef VIENNACL_WITH_OPENCL 
   30   #define VIENNACL_WITH_OPENCL 
   47 #define BENCHMARK_VECTOR_SIZE   100000 
   50 template<
typename ScalarType>
 
   64   exec_time = timer.
get();
 
   65   std::cout << 
"Time for building scalar kernels: " << exec_time << std::endl;
 
   69   exec_time = timer.
get();
 
   71   std::cout << 
"Time for building vector kernels: " << exec_time << std::endl;
 
   75   exec_time = timer.
get();
 
   76   std::cout << 
"Time for building matrix kernels: " << exec_time << std::endl;
 
   80   exec_time = timer.
get();
 
   81   std::cout << 
"Time for building compressed_matrix kernels: " << exec_time << std::endl;
 
   89     std_vec1[i] = std_vec1[i-1] * 
ScalarType(1.000001);
 
   93   double std_accumulate = 0;
 
   94   double vcl_accumulate = 0;
 
   98     std_accumulate += std_vec1[i];
 
   99   exec_time = timer.
get();
 
  100   std::cout << 
"Time for " << BENCHMARK_VECTOR_SIZE << 
" entry accesses on host: " << exec_time << std::endl;
 
  101   std::cout << 
"Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
 
  102   std::cout << 
"Result of operation on host: " << std_accumulate << std::endl;
 
  104   vcl_accumulate = vcl_vec1[0];
 
  109     vcl_accumulate += vcl_vec1[i];
 
  110   exec_time = timer.
get();
 
  111   std::cout << 
"Time for " << BENCHMARK_VECTOR_SIZE << 
" entry accesses via OpenCL: " << exec_time << std::endl;
 
  112   std::cout << 
"Time per entry: " << exec_time / BENCHMARK_VECTOR_SIZE << std::endl;
 
  113   std::cout << 
"Result of operation via OpenCL: " << vcl_accumulate << std::endl;
 
  120   std::cout << std::endl;
 
  121   std::cout << 
"----------------------------------------------" << std::endl;
 
  122   std::cout << 
"               Device Info" << std::endl;
 
  123   std::cout << 
"----------------------------------------------" << std::endl;
 
  127   std::cout << std::endl;
 
  128   std::cout << 
"----------------------------------------------" << std::endl;
 
  129   std::cout << 
"----------------------------------------------" << std::endl;
 
  130   std::cout << 
"## Benchmark :: OpenCL performance" << std::endl;
 
  131   std::cout << 
"----------------------------------------------" << std::endl;
 
  132   std::cout << std::endl;
 
  133   std::cout << 
"   -------------------------------" << std::endl;
 
  134   std::cout << 
"   # benchmarking single-precision" << std::endl;
 
  135   std::cout << 
"   -------------------------------" << std::endl;
 
  136   run_benchmark<float>();
 
  139     std::cout << std::endl;
 
  140     std::cout << 
"   -------------------------------" << std::endl;
 
  141     std::cout << 
"   # benchmarking double-precision" << std::endl;
 
  142     std::cout << 
"   -------------------------------" << std::endl;
 
  143     run_benchmark<double>();
 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
void finish() const 
Waits until all kernels in the queue have finished their execution. 
Implementation of the dense matrix class. 
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context. 
std::string info(vcl_size_t indent=0, char indent_char= ' ') const 
Returns an info string with a few properties of the device. Use full_info() to get all details...
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
Implementation of the compressed_matrix class. 
bool double_support() const 
ViennaCL convenience function: Returns true if the device supports double precision. 
A simple, yet (mostly) sufficiently accurate timer for benchmarking and profiling. 
#define BENCHMARK_VECTOR_SIZE
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A sparse square matrix in compressed sparse rows format. 
Implementation of the ViennaCL scalar class.