47 template<
typename ScalarType>
 
   56 template<
typename ScalarType>
 
   65 template<
typename ScalarType>
 
   74 template<
typename ScalarType, 
typename VCLVectorType>
 
   77    std::vector<ScalarType> v2_cpu(v2.size());
 
   81    for (
unsigned int i=0;i<v1.size(); ++i)
 
   83       if (v2_cpu[i] != v1[i])
 
   91 template<
typename T1, 
typename T2>
 
   92 int check(T1 
const & t1, T2 
const & t2)
 
   94   int retval = EXIT_SUCCESS;
 
   96   if (
diff(t1, t2) != 0)
 
   98     std::cout << 
"# Error! Difference: " << 
diff(t1, t2) << std::endl;
 
   99     retval = EXIT_FAILURE;
 
  108 template< 
typename NumericT, 
typename STLVectorType, 
typename ViennaCLVectorType1, 
typename ViennaCLVectorType2 >
 
  109 int test(STLVectorType       & std_v1, STLVectorType       & std_v2,
 
  110          ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
 
  112   int retval = EXIT_SUCCESS;
 
  120   std::cout << 
"Checking for zero_vector initializer..." << std::endl;
 
  121   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  124   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  127   std::cout << 
"Checking for scalar_vector initializer..." << std::endl;
 
  128   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  129     std_v1[i] = cpu_result;
 
  131   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  134   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  135     std_v1[i] = cpu_result + 1;
 
  137   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  140   std::cout << 
"Checking for unit_vector initializer..." << std::endl;
 
  141   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  142     std_v1[i] = (i == 5) ? 1 : 0;
 
  144   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  147   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  156   std::cout << 
"Checking for successful copy..." << std::endl;
 
  157   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  159   if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
 
  167   std::cout << 
"Testing inner_prod..." << std::endl;
 
  169   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  170     cpu_result += std_v1[i] * std_v2[i];
 
  174   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  176   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  180   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  181     cpu_result += (std_v1[i] + std_v2[i]) * (2*std_v2[i]);
 
  185   if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
 
  187   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  191   std::cout << 
"Testing norm_1..." << std::endl;
 
  193   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  194     cpu_result += std_v1[i];
 
  197   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  201   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  202     cpu_result2 += std_v1[i];
 
  205   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  209   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  210     cpu_result2 += std_v1[i] + std_v2[i];
 
  213   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  217   std::cout << 
"Testing norm_inf..." << std::endl;
 
  219   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  220     if (std_v1[i] > cpu_result)
 
  221       cpu_result = std_v1[i];
 
  224   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  228   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  229     if (std_v1[i] > cpu_result2)
 
  230       cpu_result2 = std_v1[i];
 
  233   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  237   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  238     if (std_v1[i] + std_v2[i] > cpu_result2)
 
  239       cpu_result2 = std_v1[i] + std_v2[i];
 
  242   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  246   std::cout << 
"Testing index_norm_inf..." << std::endl;
 
  248   std::size_t cpu_index = 0;
 
  250   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  251     if (std_v1[i] > cpu_result)
 
  253       cpu_result = std_v1[i];
 
  258   if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
 
  263   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  268   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  269     if (std_v1[i] + std_v2[i] > cpu_result)
 
  271       cpu_result = std_v1[i];
 
  276   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  280   std::cout << 
"Testing max..." << std::endl;
 
  281   cpu_result = std_v1[0];
 
  282   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  283     cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
 
  286   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  289   cpu_result = std_v1[0];
 
  290   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  291     cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
 
  292   gpu_result = cpu_result;
 
  296   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  299   cpu_result = std_v1[0] + std_v2[0];
 
  300   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  301     cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
 
  302   gpu_result = cpu_result;
 
  306   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  311   std::cout << 
"Testing min..." << std::endl;
 
  312   cpu_result = std_v1[0];
 
  313   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  314     cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
 
  317   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  320   cpu_result = std_v1[0];
 
  321   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  322     cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
 
  323   gpu_result = cpu_result;
 
  327   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  330   cpu_result = std_v1[0] + std_v2[0];
 
  331   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  332     cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
 
  333   gpu_result = cpu_result;
 
  337   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  341   std::cout << 
"Testing sum..." << std::endl;
 
  343   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  344     cpu_result += std_v1[i];
 
  348   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  350   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  354   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  355     cpu_result += std_v1[i] + std_v2[i];
 
  359   if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
 
  361   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  367   std::cout << 
"Testing assignments..." << std::endl;
 
  369   for (
size_t i=0; i < std_v1.size(); ++i)
 
  372   for (
size_t i=0; i < vcl_v1.size(); ++i)
 
  375   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  382   std::cout << 
"Testing scaling with CPU scalar..." << std::endl;
 
  386   for (
size_t i=0; i<std_v1.size(); ++i)
 
  390   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  393   std::cout << 
"Testing scaling with GPU scalar..." << std::endl;
 
  394   for (
size_t i=0; i<std_v1.size(); ++i)
 
  398   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  404   std::cout << 
"Testing shrinking with CPU scalar..." << std::endl;
 
  405   for (
size_t i=0; i<std_v1.size(); ++i)
 
  409   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  412   std::cout << 
"Testing shrinking with GPU scalar..." << std::endl;
 
  413   for (
size_t i=0; i<std_v1.size(); ++i)
 
  417   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  424   for (
size_t i=0; i<std_v1.size(); ++i)
 
  426   for (
size_t i=0; i<std_v1.size(); ++i)
 
  427     std_v2[i] = 3 * std_v1[i];
 
  431   std::cout << 
"Testing add on vector..." << std::endl;
 
  433   std::cout << 
"Checking for successful copy..." << std::endl;
 
  434   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  436   if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
 
  439   for (
size_t i=0; i<std_v1.size(); ++i)
 
  440     std_v1[i] = std_v1[i] + std_v2[i];
 
  441   vcl_v1 = vcl_v1 + vcl_v2;
 
  443   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  446   std::cout << 
"Testing inplace-add on vector..." << std::endl;
 
  447   for (
size_t i=0; i<std_v1.size(); ++i)
 
  448     std_v1[i] += std_v2[i];
 
  451   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  458   std::cout << 
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
 
  459   for (
size_t i=0; i < std_v1.size(); ++i)
 
  461   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  462     std_v2[i] = 3 * std_v1[i];
 
  466   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  467     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  468   vcl_v1 = vcl_v1 + alpha * vcl_v2;
 
  470   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  473   std::cout << 
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
 
  474   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  475     std_v2[i] = 3 * std_v1[i];
 
  479   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  480     std_v1[i] = alpha * std_v1[i] + std_v2[i];
 
  481   vcl_v1 = alpha * vcl_v1 + vcl_v2;
 
  483   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  486   std::cout << 
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
 
  487   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  488     std_v2[i] = 3 * std_v1[i];
 
  492   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  493     std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
 
  494   vcl_v1 = alpha * vcl_v1 + beta * vcl_v2;
 
  496   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  500   std::cout << 
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
 
  501   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  502     std_v2[i] = 3 * std_v1[i];
 
  506   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  507     std_v1[i] += alpha * std_v2[i];
 
  508   vcl_v1 += alpha * vcl_v2;
 
  510   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  514   std::cout << 
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
 
  515   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  516     std_v2[i] = 3 * std_v1[i];
 
  520   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  521     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  522   vcl_v1   = vcl_v1   + gpu_alpha *   vcl_v2;
 
  524   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  527   std::cout << 
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
 
  528   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  529     std_v2[i] = 3 * std_v1[i];
 
  533   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  534     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  535   vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
 
  537   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  540   std::cout << 
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
 
  541   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  542     std_v2[i] = 3 * std_v1[i];
 
  546   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  547     std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
 
  548   vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
 
  550   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  554   std::cout << 
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
 
  555   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  556     std_v2[i] = 3 * std_v1[i];
 
  560   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  561     std_v1[i] += alpha * std_v1[i] + beta * std_v2[i];
 
  562   vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
 
  564   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  568   std::cout << 
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
 
  569   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  570     std_v2[i] = 3 * std_v1[i];
 
  574   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  575     std_v1[i] += alpha * std_v2[i];
 
  576   vcl_v1 += gpu_alpha * vcl_v2;
 
  578   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  585   std::cout << 
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
 
  586   for (
size_t i=0; i < std_v1.size(); ++i)
 
  588   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  589     std_v2[i] = 3 * std_v1[i];
 
  593   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  594     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  595   vcl_v1 = vcl_v1 + vcl_v2 / alpha;
 
  597   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  601   std::cout << 
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
 
  602   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  603     std_v2[i] = 3 * std_v1[i];
 
  607   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  608     std_v1[i] = std_v1[i] / alpha + std_v2[i];
 
  609   vcl_v1 = vcl_v1 / alpha + vcl_v2;
 
  611   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  614   std::cout << 
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
 
  615   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  616     std_v2[i] = 3 * std_v1[i];
 
  620   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  621     std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
 
  622   vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta;
 
  624   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  627   std::cout << 
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
 
  628   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  629     std_v2[i] = 3 * std_v1[i];
 
  633   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  634     std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta;
 
  635   vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
 
  637   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  641   std::cout << 
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
 
  642   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  643     std_v2[i] = 3 * std_v1[i];
 
  647   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  648     std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta;
 
  649   vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
 
  651   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  656   std::cout << 
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
 
  657   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  658     std_v2[i] = 3 * std_v1[i];
 
  662   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  663     std_v1[i] += std_v2[i] / alpha;
 
  664   vcl_v1 += vcl_v2 / alpha;
 
  666   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  670   std::cout << 
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
 
  671   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  672     std_v2[i] = 3 * std_v1[i];
 
  676   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  677     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  678   vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
 
  680   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  683   std::cout << 
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
 
  684   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  685     std_v2[i] = 3 * std_v1[i];
 
  689   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  690     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  691   vcl_v1   = vcl_v1   +   vcl_v2 / gpu_alpha;
 
  693   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  696   std::cout << 
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
 
  697   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  698     std_v2[i] = 3 * std_v1[i];
 
  702   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  703     std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
 
  704   vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
 
  706   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  710   std::cout << 
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
 
  711   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  712     std_v2[i] = 3 * std_v1[i];
 
  716   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  717     std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta;
 
  718   vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
 
  720   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  723   std::cout << 
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
 
  724   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  725     std_v2[i] = 3 * std_v1[i];
 
  729   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  730     std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta;
 
  731   vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
 
  733   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  737   std::cout << 
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
 
  738   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  739     std_v2[i] = 3 * std_v1[i];
 
  743   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  744     std_v1[i] += std_v2[i] * alpha;
 
  745   vcl_v1 += vcl_v2 * gpu_alpha;
 
  747   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  753   for (
size_t i=0; i < std_v1.size(); ++i)
 
  755   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  756     std_v2[i] = 3 * std_v1[i];
 
  760   std::cout << 
"Testing three vector additions..." << std::endl;
 
  761   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  762     std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i];
 
  763   vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
 
  765   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  769   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  770     std_v2[i] = 3 * std_v1[i];
 
  774   std::cout << 
"Testing swap..." << std::endl;
 
  775   swap(std_v1, std_v2);
 
  776   swap(vcl_v1, vcl_v2);
 
  778   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  781   std::cout << 
"Testing elementwise multiplication..." << std::endl;
 
  782   std::cout << 
" v1 = element_prod(v1, v2);" << std::endl;
 
  783   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  784     std_v1[i] = std_v1[i] * std_v2[i];
 
  787   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  790   std::cout << 
" v1 += element_prod(v1, v2);" << std::endl;
 
  791   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  792     std_v1[i] += std_v1[i] * std_v2[i];
 
  795   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  799   std::cout << 
" v1 = element_prod(v1 + v2, v2);" << std::endl;
 
  800   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  801     std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i];
 
  804   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  807   std::cout << 
" v1 += element_prod(v1 + v2, v2);" << std::endl;
 
  808   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  809     std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i];
 
  812   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  816   std::cout << 
" v1 = element_prod(v1, v2 + v1);" << std::endl;
 
  817   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  818     std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]);
 
  821   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  824   std::cout << 
" v1 += element_prod(v1, v2 + v1);" << std::endl;
 
  825   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  826     std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]);
 
  829   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  833   std::cout << 
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
 
  834   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  835     std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
 
  838   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  841   std::cout << 
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
 
  842   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  843     std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
 
  846   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  850   std::cout << 
"Testing elementwise division..." << std::endl;
 
  851   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  860   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  861     std_v1[i] = std_v1[i] / std_v2[i];
 
  864   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  867   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  868     std_v1[i] += std_v1[i] / std_v2[i];
 
  871   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  875   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  876     std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i];
 
  879   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  882   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  883     std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i];
 
  886   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  890   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  891     std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]);
 
  894   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  897   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  898     std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]);
 
  901   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  905   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  906     std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
 
  909   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  912   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  913     std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
 
  916   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  924 template< 
typename NumericT >
 
  927   int retval = EXIT_SUCCESS;
 
  928   std::size_t 
size = 12345;
 
  930   std::cout << 
"Running tests for vector of size " << size << std::endl;
 
  935   std::vector<NumericT> std_full_vec(size);
 
  936   std::vector<NumericT> std_full_vec2(std_full_vec.size());
 
  938   for (std::size_t i=0; i<std_full_vec.size(); ++i)
 
  944   std::vector<NumericT> std_range_vec (2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
 
  945   std::vector<NumericT> std_range_vec2(2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
 
  947   for (std::size_t i=0; i<std_range_vec.size(); ++i)
 
  948     std_range_vec[i] = std_full_vec[i + std_full_vec.size() / 4];
 
  949   for (std::size_t i=0; i<std_range_vec2.size(); ++i)
 
  950     std_range_vec2[i] = std_full_vec2[i + 2 * std_full_vec2.size() / 4];
 
  952   std::vector<NumericT> std_slice_vec (std_full_vec.size() / 4);
 
  953   std::vector<NumericT> std_slice_vec2(std_full_vec.size() / 4);
 
  955   for (std::size_t i=0; i<std_slice_vec.size(); ++i)
 
  956     std_slice_vec[i] = std_full_vec[3*i + std_full_vec.size() / 4];
 
  957   for (std::size_t i=0; i<std_slice_vec2.size(); ++i)
 
  958     std_slice_vec2[i] = std_full_vec2[2*i + 2 * std_full_vec2.size() / 4];
 
  967   viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
 
  969   viennacl::range vcl_r1(    vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
 
  970   viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
 
  978     std::vector<NumericT> std_short_vec(std_range_vec);
 
  979     std::vector<NumericT> std_short_vec2(std_range_vec2);
 
  981     std::cout << 
"Testing creation of vectors from range..." << std::endl;
 
  982     if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
 
  984     if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
 
  988   viennacl::slice vcl_s1(    vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
 
  989   viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
 
  996   std::vector<NumericT> std_short_vec(std_slice_vec);
 
  997   std::vector<NumericT> std_short_vec2(std_slice_vec2);
 
  999   std::cout << 
"Testing creation of vectors from slice..." << std::endl;
 
 1000   if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
 
 1001     return EXIT_FAILURE;
 
 1002   if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
 
 1003     return EXIT_FAILURE;
 
 1010   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
 
 1011   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1012                           vcl_short_vec, vcl_short_vec2);
 
 1013   if (retval != EXIT_SUCCESS)
 
 1014     return EXIT_FAILURE;
 
 1016   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
 
 1017   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1018                           vcl_short_vec, vcl_range_vec2);
 
 1019   if (retval != EXIT_SUCCESS)
 
 1020     return EXIT_FAILURE;
 
 1022   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
 
 1023   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1024                           vcl_short_vec, vcl_slice_vec2);
 
 1025   if (retval != EXIT_SUCCESS)
 
 1026     return EXIT_FAILURE;
 
 1030   std::cout << 
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
 
 1031   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1032                           vcl_range_vec, vcl_short_vec2);
 
 1033   if (retval != EXIT_SUCCESS)
 
 1034     return EXIT_FAILURE;
 
 1036   std::cout << 
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
 
 1037   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1038                           vcl_range_vec, vcl_range_vec2);
 
 1039   if (retval != EXIT_SUCCESS)
 
 1040     return EXIT_FAILURE;
 
 1042   std::cout << 
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
 
 1043   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1044                           vcl_range_vec, vcl_slice_vec2);
 
 1045   if (retval != EXIT_SUCCESS)
 
 1046     return EXIT_FAILURE;
 
 1050   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
 
 1051   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1052                           vcl_slice_vec, vcl_short_vec2);
 
 1053   if (retval != EXIT_SUCCESS)
 
 1054     return EXIT_FAILURE;
 
 1056   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
 
 1057   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1058                           vcl_slice_vec, vcl_range_vec2);
 
 1059   if (retval != EXIT_SUCCESS)
 
 1060     return EXIT_FAILURE;
 
 1062   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
 
 1063   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1064                           vcl_slice_vec, vcl_slice_vec2);
 
 1065   if (retval != EXIT_SUCCESS)
 
 1066     return EXIT_FAILURE;
 
 1068   return EXIT_SUCCESS;
 
 1078   std::cout << std::endl;
 
 1079   std::cout << 
"----------------------------------------------" << std::endl;
 
 1080   std::cout << 
"----------------------------------------------" << std::endl;
 
 1081   std::cout << 
"## Test :: Vector with Integer types" << std::endl;
 
 1082   std::cout << 
"----------------------------------------------" << std::endl;
 
 1083   std::cout << 
"----------------------------------------------" << std::endl;
 
 1084   std::cout << std::endl;
 
 1086   int retval = EXIT_SUCCESS;
 
 1088   std::cout << std::endl;
 
 1089   std::cout << 
"----------------------------------------------" << std::endl;
 
 1090   std::cout << std::endl;
 
 1092     std::cout << 
"# Testing setup:" << std::endl;
 
 1093     std::cout << 
"  numeric: unsigned int" << std::endl;
 
 1094     retval = test<unsigned int>();
 
 1095     if ( retval == EXIT_SUCCESS )
 
 1096       std::cout << 
"# Test passed" << std::endl;
 
 1100   std::cout << std::endl;
 
 1101   std::cout << 
"----------------------------------------------" << std::endl;
 
 1102   std::cout << std::endl;
 
 1104     std::cout << 
"# Testing setup:" << std::endl;
 
 1105     std::cout << 
"  numeric: long" << std::endl;
 
 1106     retval = test<unsigned long>();
 
 1107     if ( retval == EXIT_SUCCESS )
 
 1108       std::cout << 
"# Test passed" << std::endl;
 
 1112   std::cout << std::endl;
 
 1113   std::cout << 
"----------------------------------------------" << std::endl;
 
 1114   std::cout << std::endl;
 
 1116   std::cout << std::endl;
 
 1117   std::cout << 
"------- Test completed --------" << std::endl;
 
 1118   std::cout << std::endl;
 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
int check(T1 const &t1, T2 const &t2)
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations. 
int test(STLVectorType &std_v1, STLVectorType &std_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
Class for representing non-strided subvectors of a bigger vector x. 
Class for representing strided subvectors of a bigger vector x. 
Proxy classes for vectors. 
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied. 
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
Represents a vector consisting of 1 at a given index and zeros otherwise. 
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)