In this tutorial it is shown how BLAS level 3 functionality in ViennaCL can be used.
We begin with defining preprocessor constants and including the necessary headers. 
#ifndef NDEBUG
 #define NDEBUG
#endif
#include <iostream>
#include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <boost/numeric/ublas/matrix_sparse.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/matrix_proxy.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/io.hpp>
#define VIENNACL_WITH_UBLAS 1
#define BLAS3_MATRIX_SIZE   400
using namespace boost::numeric;
  Later in this tutorial we will iterate over all available OpenCL devices. To ensure that this tutorial also works if no OpenCL backend is activated, we need this dummy-struct. 
#ifndef VIENNACL_WITH_OPENCL
  struct dummy
  {
    std::size_t 
size()
 const { 
return 1; }
  };
#endif
 We don't need additional auxiliary routines, so let us start straight away with main():
#ifndef NDEBUG
 #define NDEBUG
#endif
#include <iostream>
#include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <boost/numeric/ublas/matrix_sparse.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/matrix_proxy.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/io.hpp>
#define VIENNACL_WITH_UBLAS 1
#define BLAS3_MATRIX_SIZE   400
using namespace boost::numeric;
#ifndef VIENNACL_WITH_OPENCL
  struct dummy
  {
    std::size_t 
size()
 const { 
return 1; }
  };
#endif
{
  double exec_time;
  for (unsigned int i = 0; i < ublas_A.size1(); ++i)
    for (unsigned int j = 0; j < ublas_A.size2(); ++j)
      ublas_A(i,j) = randomNumber();
  for (unsigned int i = 0; i < ublas_B.size1(); ++i)
    for (unsigned int j = 0; j < ublas_B.size2(); ++j)
      ublas_B(i,j) = randomNumber();
  
  std::cout << "--- Computing matrix-matrix product using ublas ---" << std::endl;
  std::cout << " - Execution time: " << exec_time << std::endl;
  std::cout << std::endl << "--- Computing matrix-matrix product on each available compute device using ViennaCL ---" << std::endl;
#ifdef VIENNACL_WITH_OPENCL
#else
  dummy devices;
#endif
  for (std::size_t device_id=0; device_id<devices.size(); ++device_id)
  {
#ifdef VIENNACL_WITH_OPENCL
#endif
    std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl;
    std::cout << " - Checking result... ";
    bool check_ok = true;
    for (std::size_t i = 0; i < ublas_A.size1(); ++i)
    {
      for (std::size_t j = 0; j < ublas_A.size2(); ++j)
      {
        if ( std::fabs(ublas_C1(i,j) - ublas_C(i,j)) / ublas_C(i,j) > 1e-4 )
        {
          check_ok = false;
          break;
        }
      }
      if (!check_ok)
        break;
    }
    if (check_ok)
      std::cout << "[OK]" << std::endl << std::endl;
    else
      std::cout << "[FAILED]" << std::endl << std::endl;
  }
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
  return EXIT_SUCCESS;
}