Go to the source code of this file.
      
        
          | #define BENCHMARK_OP | ( |  | OPERATION, | 
        
          |  |  |  | NAME, | 
        
          |  |  |  | PERF, | 
        
          |  |  |  | INDEX | 
        
          |  | ) |  |  | 
      
 
Value:
  timer.start(); \
  Nruns = 0; \
  time_spent = 0; \
  while (time_spent < time_per_benchmark) \
  { \
    time_previous = timer.get(); \
    time_spent += timer.get() - time_previous; \
    Nruns+=1; \
  } \
  time_spent/=(double)Nruns; \
  std::cout << prefix << NAME " : " << PERF << " " INDEX << std::endl; \
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
 
 
 
template<class T > 
      
        
          | void bench | ( | size_t | BLAS1_N, | 
        
          |  |  | size_t | BLAS2_M, | 
        
          |  |  | size_t | BLAS2_N, | 
        
          |  |  | size_t | BLAS3_M, | 
        
          |  |  | size_t | BLAS3_N, | 
        
          |  |  | size_t | BLAS3_K, | 
        
          |  |  | std::string const & | prefix | 
        
          |  | ) |  |  | 
      
 
 
template<class T , class F >