45 template<
typename ScalarType>
 
   54 template<
typename ScalarType>
 
   63 template<
typename ScalarType>
 
   72 template<
typename ScalarType, 
typename VCLVectorType>
 
   75    std::vector<ScalarType> v2_cpu(v2.size());
 
   79    for (
unsigned int i=0;i<v1.size(); ++i)
 
   81       if (v2_cpu[i] != v1[i])
 
   88 template<
typename T1, 
typename T2>
 
   89 int check(T1 
const & t1, T2 
const & t2)
 
   91   int retval = EXIT_SUCCESS;
 
   93   if (
diff(t1, t2) != 0)
 
   95     std::cout << 
"# Error! Difference: " << std::abs(
diff(t1, t2)) << std::endl;
 
   96     retval = EXIT_FAILURE;
 
  105 template< 
typename NumericT, 
typename STLVectorType, 
typename ViennaCLVectorType1, 
typename ViennaCLVectorType2 >
 
  106 int test(STLVectorType       & std_v1, STLVectorType       & std_v2,
 
  107          ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
 
  109   int retval = EXIT_SUCCESS;
 
  117   std::cout << 
"Checking for zero_vector initializer..." << std::endl;
 
  118   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  121   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  124   std::cout << 
"Checking for scalar_vector initializer..." << std::endl;
 
  125   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  126     std_v1[i] = cpu_result;
 
  128   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  131   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  132     std_v1[i] = cpu_result + 1;
 
  134   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  137   std::cout << 
"Checking for unit_vector initializer..." << std::endl;
 
  138   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  139     std_v1[i] = (i == 5) ? 1 : 0;
 
  141   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  144   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  153   std::cout << 
"Checking for successful copy..." << std::endl;
 
  154   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  156   if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
 
  164   std::cout << 
"Testing inner_prod..." << std::endl;
 
  166   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  167     cpu_result += std_v1[i] * std_v2[i];
 
  171   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  173   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  177   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  178     cpu_result += (std_v1[i] + std_v2[i]) * (std_v2[i] - std_v1[i]);
 
  182   if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
 
  184   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  189   std::cout << 
"Testing norm_1..." << std::endl;
 
  191   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  192     cpu_result += std::abs(std_v1[i]);
 
  195   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  199   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  200     cpu_result2 += std::abs(std_v1[i]);
 
  203   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  207   for (std::size_t i=0; i<std_v1.size(); ++i)   
 
  208     cpu_result2 += std::abs(std_v1[i] + std_v2[i]);
 
  211   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  215   std::cout << 
"Testing norm_inf..." << std::endl;
 
  217   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  218     if (std::abs(std_v1[i]) > cpu_result)
 
  219       cpu_result = std::abs(std_v1[i]);
 
  222   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  225   cpu_result2 = cpu_result;
 
  229   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  233   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  234     if (std_v1[i] + std_v2[i] > cpu_result2)
 
  235       cpu_result2 = std::abs(std_v1[i] + std_v2[i]);
 
  238   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  242   std::cout << 
"Testing index_norm_inf..." << std::endl;
 
  244   std::size_t cpu_index = 0;
 
  246   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  247     if (std::abs(std_v1[i]) > cpu_result)
 
  249       cpu_result = std::abs(std_v1[i]);
 
  254   if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index)) != EXIT_SUCCESS)
 
  259   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  264   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  265     if (std::abs(std_v1[i] + std_v2[i]) > cpu_result)
 
  267       cpu_result = std::abs(std_v1[i] + std_v2[i]);
 
  270   cpu_result = std_v1[cpu_index];
 
  273   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  277   std::cout << 
"Testing max..." << std::endl;
 
  278   cpu_result = std_v1[0];
 
  279   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  280     cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
 
  283   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  286   cpu_result = std_v1[0];
 
  287   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  288     cpu_result = std::max<NumericT>(cpu_result, std_v1[i]);
 
  289   gpu_result = cpu_result;
 
  293   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  296   cpu_result = std_v1[0] + std_v2[0];
 
  297   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  298     cpu_result = std::max<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
 
  299   gpu_result = cpu_result;
 
  303   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  308   std::cout << 
"Testing min..." << std::endl;
 
  309   cpu_result = std_v1[0];
 
  310   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  311     cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
 
  314   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  317   cpu_result = std_v1[0];
 
  318   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  319     cpu_result = std::min<NumericT>(cpu_result, std_v1[i]);
 
  320   gpu_result = cpu_result;
 
  324   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  327   cpu_result = std_v1[0] + std_v2[0];
 
  328   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  329     cpu_result = std::min<NumericT>(cpu_result, std_v1[i] + std_v2[i]);
 
  330   gpu_result = cpu_result;
 
  334   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  338   std::cout << 
"Testing sum..." << std::endl;
 
  340   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  341     cpu_result += std_v1[i];
 
  345   if (
check(cpu_result, cpu_result2) != EXIT_SUCCESS)
 
  347   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  351   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  352     cpu_result += std_v1[i] + std_v2[i];
 
  356   if (
check(cpu_result, cpu_result3) != EXIT_SUCCESS)
 
  358   if (
check(cpu_result, gpu_result) != EXIT_SUCCESS)
 
  368   std::vector<NumericT> x = std_v1;
 
  369   std::vector<NumericT> y = std_v2;
 
  370   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  380   if (
check(x, vcl_v1) != EXIT_SUCCESS)
 
  382   if (
check(y, vcl_v2) != EXIT_SUCCESS)
 
  387   std::cout << 
"Testing assignments..." << std::endl;
 
  389   for (
size_t i=0; i < std_v1.size(); ++i)
 
  392   for (
size_t i=0; i < vcl_v1.size(); ++i)
 
  395   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  402   std::cout << 
"Testing scaling with CPU scalar..." << std::endl;
 
  406   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  410   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  413   std::cout << 
"Testing scaling with GPU scalar..." << std::endl;
 
  414   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  418   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  424   std::cout << 
"Testing shrinking with CPU scalar..." << std::endl;
 
  425   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  429   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  432   std::cout << 
"Testing shrinking with GPU scalar..." << std::endl;
 
  433   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  437   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  444   for (
size_t i=0; i < std_v1.size(); ++i)
 
  446   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  447     std_v2[i] = 3 * std_v1[i];
 
  451   std::cout << 
"Testing add on vector..." << std::endl;
 
  453   std::cout << 
"Checking for successful copy..." << std::endl;
 
  454   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  456   if (
check(std_v2, vcl_v2) != EXIT_SUCCESS)
 
  459   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  460     std_v1[i] = std_v1[i] + std_v2[i];
 
  461   vcl_v1 = vcl_v1 + vcl_v2;
 
  463   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  466   std::cout << 
"Testing add on vector with flipsign..." << std::endl;
 
  467   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  468     std_v1[i] = - std_v1[i] + std_v2[i];
 
  469   vcl_v1 = - vcl_v1 + vcl_v2;
 
  471   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  474   std::cout << 
"Testing inplace-add on vector..." << std::endl;
 
  475   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  476     std_v1[i] += std_v2[i];
 
  479   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  485   std::cout << 
"Testing sub on vector..." << std::endl;
 
  486   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  487     std_v2[i] = 3 * std_v1[i];
 
  491   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  492     std_v1[i] = std_v1[i] - std_v2[i];
 
  493   vcl_v1 = vcl_v1 - vcl_v2;
 
  495   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  498   std::cout << 
"Testing inplace-sub on vector..." << std::endl;
 
  499   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  500     std_v1[i] -= std_v2[i];
 
  503   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  511   std::cout << 
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
 
  512   for (
size_t i=0; i < std_v1.size(); ++i)
 
  514   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  515     std_v2[i] = 3 * std_v1[i];
 
  519   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  520     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  521   vcl_v1 = vcl_v1 + alpha * vcl_v2;
 
  523   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  526   std::cout << 
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
 
  527   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  528     std_v2[i] = 3 * std_v1[i];
 
  532   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  533     std_v1[i] = alpha * std_v1[i] + std_v2[i];
 
  534   vcl_v1 = alpha * vcl_v1 + vcl_v2;
 
  536   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  539   std::cout << 
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
 
  540   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  541     std_v2[i] = 3 * std_v1[i];
 
  545   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  546     std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
 
  547   vcl_v1 = alpha * vcl_v1 + beta * vcl_v2;
 
  549   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  553   std::cout << 
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
 
  554   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  555     std_v2[i] = 3 * std_v1[i];
 
  559   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  560     std_v1[i] += alpha * std_v2[i];
 
  561   vcl_v1 += alpha * vcl_v2;
 
  563   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  567   std::cout << 
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
 
  568   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  569     std_v2[i] = 3 * std_v1[i];
 
  573   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  574     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  575   vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
 
  577   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  580   std::cout << 
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
 
  581   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  582     std_v2[i] = 3 * std_v1[i];
 
  586   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  587     std_v1[i] = std_v1[i] + alpha * std_v2[i];
 
  588   vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
 
  590   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  593   std::cout << 
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
 
  594   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  595     std_v2[i] = 3 * std_v1[i];
 
  599   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  600     std_v1[i] = alpha * std_v1[i] + beta * std_v2[i];
 
  601   vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
 
  603   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  607   std::cout << 
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
 
  608   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  609     std_v2[i] = 3 * std_v1[i];
 
  613   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  614     std_v1[i] += alpha * std_v1[i] + beta * std_v2[i];
 
  615   vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
 
  617   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  620   std::cout << 
"Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
 
  621   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  622     std_v2[i] = 3 * std_v1[i];
 
  626   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  627     std_v1[i] += alpha * std_v1[i] - beta * std_v2[i];
 
  628   vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
 
  630   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  635   std::cout << 
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
 
  636   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  637     std_v2[i] = 3 * std_v1[i];
 
  641   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  642     std_v1[i] += alpha * std_v2[i];
 
  643   vcl_v1 += gpu_alpha * vcl_v2;
 
  645   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  652   std::cout << 
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
 
  653   for (
size_t i=0; i < std_v1.size(); ++i)
 
  655   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  656     std_v2[i] = 3 * std_v1[i];
 
  660   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  661     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  662   vcl_v1 = vcl_v1 + vcl_v2 / alpha;
 
  664   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  668   std::cout << 
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
 
  669   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  670     std_v2[i] = 3 * std_v1[i];
 
  674   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  675     std_v1[i] = std_v1[i] / alpha + std_v2[i];
 
  676   vcl_v1 = vcl_v1 / alpha + vcl_v2;
 
  678   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  681   std::cout << 
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
 
  682   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  683     std_v2[i] = 3 * std_v1[i];
 
  687   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  688     std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
 
  689   vcl_v1 = vcl_v1 / alpha + vcl_v2 / beta;
 
  691   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  694   std::cout << 
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
 
  695   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  696     std_v2[i] = 3 * std_v1[i];
 
  700   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  701     std_v1[i] = std_v1[i] / alpha + std_v2[i] * beta;
 
  702   vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
 
  704   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  708   std::cout << 
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
 
  709   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  710     std_v2[i] = 3 * std_v1[i];
 
  714   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  715     std_v1[i] = std_v1[i] * alpha + std_v2[i] / beta;
 
  716   vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
 
  718   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  723   std::cout << 
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
 
  724   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  725     std_v2[i] = 3 * std_v1[i];
 
  729   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  730     std_v1[i] += std_v2[i] / alpha;
 
  731   vcl_v1   += vcl_v2 / alpha;
 
  733   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  737   std::cout << 
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
 
  738   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  739     std_v2[i] = 3 * std_v1[i];
 
  743   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  744     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  745   vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
 
  747   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  750   std::cout << 
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
 
  751   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  752     std_v2[i] = 3 * std_v1[i];
 
  756   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  757     std_v1[i] = std_v1[i] + std_v2[i] / alpha;
 
  758   vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
 
  760   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  763   std::cout << 
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
 
  764   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  765     std_v2[i] = 3 * std_v1[i];
 
  769   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  770     std_v1[i] = std_v1[i] / alpha + std_v2[i] / beta;
 
  771   vcl_v1   =   vcl_v1 / gpu_alpha +   vcl_v2 / gpu_beta;
 
  773   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  777   std::cout << 
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
 
  778   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  779     std_v2[i] = 3 * std_v1[i];
 
  783   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  784     std_v1[i] += std_v1[i] / alpha + std_v2[i] / beta;
 
  785   vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
 
  787   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  790   std::cout << 
"Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
 
  791   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  792     std_v2[i] = 3 * std_v1[i];
 
  796   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  797     std_v1[i] += std_v1[i] / alpha - std_v2[i] / beta;
 
  798   vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
 
  800   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  803   std::cout << 
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
 
  804   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  805     std_v2[i] = 3 * std_v1[i];
 
  809   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  810     std_v1[i] += std_v1[i] / alpha + std_v2[i] * beta;
 
  811   vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
 
  813   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  816   std::cout << 
"Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
 
  817   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  818     std_v2[i] = 3 * std_v1[i];
 
  822   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  823     std_v1[i] += std_v1[i] * alpha - std_v2[i] / beta;
 
  824   vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
 
  826   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  831   std::cout << 
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
 
  832   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  833     std_v2[i] = 3 * std_v1[i];
 
  837   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  838     std_v1[i] += std_v2[i] * alpha;
 
  839   vcl_v1 += vcl_v2 * gpu_alpha;
 
  841   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  847   std::cout << 
"Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
 
  848   for (
size_t i=0; i < std_v1.size(); ++i)
 
  850   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  851     std_v2[i] = 3 * std_v1[i];
 
  855   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  856     std_v1[i] = std_v1[i] - alpha * std_v2[i];
 
  857   vcl_v1 = vcl_v1 - alpha * vcl_v2;
 
  859   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  863   std::cout << 
"Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
 
  864   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  865     std_v2[i] = 3 * std_v1[i];
 
  869   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  870     std_v1[i] = alpha * std_v1[i] - std_v2[i];
 
  871   vcl_v1 = alpha * vcl_v1 - vcl_v2;
 
  873   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  876   std::cout << 
"Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
 
  877   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  878     std_v2[i] = 3 * std_v1[i];
 
  882   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  883     std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
 
  884   vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
 
  886   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  890   std::cout << 
"Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
 
  891   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  892     std_v2[i] = 3 * std_v1[i];
 
  896   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  897     std_v1[i] -= alpha * std_v2[i];
 
  898   vcl_v1 -= alpha * vcl_v2;
 
  900   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  904   std::cout << 
"Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
 
  905   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  906     std_v2[i] = 3 * std_v1[i];
 
  910   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  911     std_v1[i] = std_v1[i] - alpha * std_v2[i];
 
  912   vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
 
  914   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  917   std::cout << 
"Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
 
  918   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  919     std_v2[i] = 3 * std_v1[i];
 
  923   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  924     std_v1[i] = std_v1[i] - alpha * std_v2[i];
 
  925   vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
 
  927   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  930   std::cout << 
"Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
 
  931   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  932     std_v2[i] = 3 * std_v1[i];
 
  936   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  937     std_v1[i] = alpha * std_v1[i] - beta * std_v2[i];
 
  938   vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
 
  940   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  943   std::cout << 
"Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
 
  944   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  945     std_v2[i] = 3 * std_v1[i];
 
  949   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  950     std_v1[i] -= alpha * std_v1[i] + beta * std_v2[i];
 
  951   vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
 
  953   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  956   std::cout << 
"Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
 
  957   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  958     std_v2[i] = 3 * std_v1[i];
 
  962   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  963     std_v1[i] -= alpha * std_v1[i] - beta * std_v2[i];
 
  964   vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
 
  966   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  970   std::cout << 
"Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
 
  971   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  972     std_v2[i] = 3 * std_v1[i];
 
  976   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  977     std_v1[i] -= alpha * std_v2[i];
 
  978   vcl_v1 -= gpu_alpha * vcl_v2;
 
  980   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
  988   std::cout << 
"Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
 
  989   for (
size_t i=0; i < std_v1.size(); ++i)
 
  991   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  992     std_v2[i] = 3 * std_v1[i];
 
  996   for (std::size_t i=0; i<std_v1.size(); ++i)
 
  997     std_v1[i] = std_v1[i] - std_v2[i] / alpha;
 
  998   vcl_v1 = vcl_v1 - vcl_v2 / alpha;
 
 1000   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1001     return EXIT_FAILURE;
 
 1004   std::cout << 
"Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
 
 1005   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1006     std_v2[i] = 3 * std_v1[i];
 
 1010   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1011     std_v1[i] = std_v1[i] / alpha - std_v2[i];
 
 1012   vcl_v1 = vcl_v1 / alpha - vcl_v2;
 
 1014   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1015     return EXIT_FAILURE;
 
 1017   std::cout << 
"Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
 
 1018   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1019     std_v2[i] = 3 * std_v1[i];
 
 1023   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1024     std_v1[i] = std_v1[i] / alpha - std_v2[i] / alpha;
 
 1025   vcl_v1   =   vcl_v1 / alpha -   vcl_v2 / alpha;
 
 1027   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1028     return EXIT_FAILURE;
 
 1031   std::cout << 
"Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
 
 1032   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1033     std_v2[i] = 3 * std_v1[i];
 
 1037   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1038     std_v1[i] -= std_v2[i] / alpha;
 
 1039   vcl_v1 -= vcl_v2 / alpha;
 
 1041   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1042     return EXIT_FAILURE;
 
 1044   std::cout << 
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
 
 1045   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1046     std_v2[i] = 3 * std_v1[i];
 
 1050   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1051     std_v1[i] -= std_v2[i] / alpha;
 
 1052   vcl_v1   -=   vcl_v2 / gpu_alpha;
 
 1054   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1055     return EXIT_FAILURE;
 
 1058   std::cout << 
"Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
 
 1059   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1060     std_v2[i] = 3 * std_v1[i];
 
 1064   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1065     std_v1[i] = std_v1[i] - std_v2[i] / alpha;
 
 1066   vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
 
 1068   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1069     return EXIT_FAILURE;
 
 1071   std::cout << 
"Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
 
 1072   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1073     std_v2[i] = 3 * std_v1[i];
 
 1077   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1078     std_v1[i] = std_v1[i] - std_v2[i] / alpha;
 
 1079   vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
 
 1081   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1082     return EXIT_FAILURE;
 
 1084   std::cout << 
"Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
 
 1085   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1086     std_v2[i] = 3 * std_v1[i];
 
 1090   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1091     std_v1[i] = std_v1[i] / alpha - std_v2[i] / beta;
 
 1092   vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
 
 1094   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1095     return EXIT_FAILURE;
 
 1097   std::cout << 
"Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
 
 1098   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1099     std_v2[i] = 3 * std_v1[i];
 
 1103   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1104     std_v1[i] -= std_v1[i] / alpha + std_v2[i] / beta;
 
 1105   vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
 
 1107   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1108     return EXIT_FAILURE;
 
 1110   std::cout << 
"Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
 
 1111   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1112     std_v2[i] = 3 * std_v1[i];
 
 1116   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1117     std_v1[i] -= std_v1[i] / alpha - std_v2[i] / beta;
 
 1118   vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
 
 1120   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1121     return EXIT_FAILURE;
 
 1123   std::cout << 
"Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
 
 1124   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1125     std_v2[i] = 3 * std_v1[i];
 
 1129   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1130     std_v1[i] = std_v1[i] * alpha - std_v2[i] / beta;
 
 1131   vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
 
 1133   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1134     return EXIT_FAILURE;
 
 1136   std::cout << 
"Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
 
 1137   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1138     std_v2[i] = 3 * std_v1[i];
 
 1142   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1143     std_v1[i] = std_v1[i] / alpha - std_v2[i] * beta;
 
 1144   vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
 
 1146   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1147     return EXIT_FAILURE;
 
 1149   std::cout << 
"Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
 
 1150   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1151     std_v2[i] = 3 * std_v1[i];
 
 1155   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1156     std_v1[i] -= std_v1[i] * alpha + std_v2[i] / beta;
 
 1157   vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
 
 1159   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1160     return EXIT_FAILURE;
 
 1162   std::cout << 
"Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
 
 1163   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1164     std_v2[i] = 3 * std_v1[i];
 
 1168   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1169     std_v1[i] -= std_v1[i] / alpha + std_v2[i] * beta;
 
 1170   vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
 
 1172   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1173     return EXIT_FAILURE;
 
 1175   std::cout << 
"Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
 
 1176   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1177     std_v2[i] = 3 * std_v1[i];
 
 1181   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1182     std_v1[i] -= std_v1[i] * alpha - std_v2[i] / beta;
 
 1183   vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
 
 1185   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1186     return EXIT_FAILURE;
 
 1188   std::cout << 
"Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
 
 1189   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1190     std_v2[i] = 3 * std_v1[i];
 
 1194   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1195     std_v1[i] -= std_v1[i] / alpha - std_v2[i] * beta;
 
 1196   vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
 
 1198   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1199     return EXIT_FAILURE;
 
 1202   std::cout << 
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
 
 1203   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1204     std_v2[i] = 3 * std_v1[i];
 
 1208   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1209     std_v1[i] -= alpha * std_v2[i];
 
 1210   vcl_v1 -= gpu_alpha * vcl_v2;
 
 1212   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1213     return EXIT_FAILURE;
 
 1220   for (
size_t i=0; i < std_v1.size(); ++i)
 
 1222   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1223     std_v2[i] = 3 * std_v1[i];
 
 1227   std::cout << 
"Testing three vector additions..." << std::endl;
 
 1228   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1229     std_v1[i] = std_v2[i] + std_v1[i] + std_v2[i];
 
 1230   vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
 
 1232   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1233     return EXIT_FAILURE;
 
 1236   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1237     std_v2[i] = 3 * std_v1[i];
 
 1241   std::cout << 
"Testing complicated vector expression with CPU scalar..." << std::endl;
 
 1242   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1243     std_v1[i] = beta * (std_v1[i] - alpha * std_v2[i]);
 
 1244   vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
 
 1246   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1247     return EXIT_FAILURE;
 
 1249   std::cout << 
"Testing complicated vector expression with GPU scalar..." << std::endl;
 
 1250   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1251     std_v1[i] = beta * (std_v1[i] -  alpha * std_v2[i]);
 
 1252   vcl_v1  = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
 
 1254   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1255     return EXIT_FAILURE;
 
 1258   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1259     std_v2[i] = 3 * std_v1[i];
 
 1263   std::cout << 
"Testing swap..." << std::endl;
 
 1264   swap(std_v1, std_v2);
 
 1265   swap(vcl_v1, vcl_v2);
 
 1267   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1268     return EXIT_FAILURE;
 
 1271   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1280   std::cout << 
"Testing unary operator-..." << std::endl;
 
 1281   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1282     std_v1[i] = -std_v2[i];
 
 1285   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1286     return EXIT_FAILURE;
 
 1289   std::cout << 
"Testing elementwise multiplication..." << std::endl;
 
 1290   std::cout << 
" v1 = element_prod(v1, v2);" << std::endl;
 
 1291   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1292     std_v1[i] = std_v1[i] * std_v2[i];
 
 1295   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1296     return EXIT_FAILURE;
 
 1298   std::cout << 
" v1 += element_prod(v1, v2);" << std::endl;
 
 1299   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1300     std_v1[i] += std_v1[i] * std_v2[i];
 
 1303   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1304     return EXIT_FAILURE;
 
 1306   std::cout << 
" v1 -= element_prod(v1, v2);" << std::endl;
 
 1307   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1308     std_v1[i] -= std_v1[i] * std_v2[i];
 
 1311   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1312     return EXIT_FAILURE;
 
 1315   std::cout << 
" v1 = element_prod(v1 + v2, v2);" << std::endl;
 
 1316   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1317     std_v1[i] = (std_v1[i] + std_v2[i]) * std_v2[i];
 
 1320   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1321     return EXIT_FAILURE;
 
 1323   std::cout << 
" v1 += element_prod(v1 + v2, v2);" << std::endl;
 
 1324   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1325     std_v1[i] += (std_v1[i] + std_v2[i]) * std_v2[i];
 
 1328   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1329     return EXIT_FAILURE;
 
 1331   std::cout << 
" v1 -= element_prod(v1 + v2, v2);" << std::endl;
 
 1332   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1333     std_v1[i] -= (std_v1[i] + std_v2[i]) * std_v2[i];
 
 1336   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1337     return EXIT_FAILURE;
 
 1340   std::cout << 
" v1 = element_prod(v1, v2 + v1);" << std::endl;
 
 1341   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1342     std_v1[i] = std_v1[i] * (std_v2[i] + std_v1[i]);
 
 1345   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1346     return EXIT_FAILURE;
 
 1348   std::cout << 
" v1 += element_prod(v1, v2 + v1);" << std::endl;
 
 1349   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1350     std_v1[i] += std_v1[i] * (std_v2[i] + std_v1[i]);
 
 1353   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1354     return EXIT_FAILURE;
 
 1356   std::cout << 
" v1 -= element_prod(v1, v2 + v1);" << std::endl;
 
 1357   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1358     std_v1[i] -= std_v1[i] * (std_v2[i] + std_v1[i]);
 
 1361   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1362     return EXIT_FAILURE;
 
 1365   std::cout << 
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
 
 1366   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1367     std_v1[i] = (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
 
 1370   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1371     return EXIT_FAILURE;
 
 1373   std::cout << 
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
 
 1374   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1375     std_v1[i] += (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
 
 1378   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1379     return EXIT_FAILURE;
 
 1381   std::cout << 
" v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
 
 1382   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1383     std_v1[i] -= (std_v1[i] + std_v2[i]) * (std_v2[i] + std_v1[i]);
 
 1386   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1387     return EXIT_FAILURE;
 
 1390   std::cout << 
"Testing elementwise division..." << std::endl;
 
 1391   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1400   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1401     std_v1[i] = std_v1[i] / std_v2[i];
 
 1404   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1405     return EXIT_FAILURE;
 
 1407   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1408     std_v1[i] += std_v1[i] / std_v2[i];
 
 1411   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1412     return EXIT_FAILURE;
 
 1414   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1415     std_v1[i] -= std_v1[i] / std_v2[i];
 
 1418   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1419     return EXIT_FAILURE;
 
 1422   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1423     std_v1[i] = (std_v1[i] + std_v2[i]) / std_v2[i];
 
 1426   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1427     return EXIT_FAILURE;
 
 1429   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1430     std_v1[i] += (std_v1[i] + std_v2[i]) / std_v2[i];
 
 1433   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1434     return EXIT_FAILURE;
 
 1436   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1437     std_v1[i] -= (std_v1[i] + std_v2[i]) / std_v2[i];
 
 1440   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1441     return EXIT_FAILURE;
 
 1444   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1445     std_v1[i] = std_v1[i] / (std_v2[i] + std_v1[i]);
 
 1448   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1449     return EXIT_FAILURE;
 
 1451   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1452     std_v1[i] += std_v1[i] / (std_v2[i] + std_v1[i]);
 
 1455   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1456     return EXIT_FAILURE;
 
 1458   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1459     std_v1[i] -= std_v1[i] / (std_v2[i] + std_v1[i]);
 
 1462   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1463     return EXIT_FAILURE;
 
 1466   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1467     std_v1[i] = (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
 
 1470   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1471     return EXIT_FAILURE;
 
 1473   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1474     std_v1[i] += (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
 
 1477   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1478     return EXIT_FAILURE;
 
 1480   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1481     std_v1[i] -= (std_v1[i] + std_v2[i]) / (std_v2[i] + std_v1[i]);
 
 1484   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1485     return EXIT_FAILURE;
 
 1487   std::cout << 
"Testing unary elementwise operations..." << std::endl;
 
 1489 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \ 
 1490   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1491   std_v2[i] = 3 * std_v1[i]; \ 
 1492   viennacl::copy(std_v1.begin(), std_v1.end(), vcl_v1.begin()); \ 
 1493   viennacl::copy(std_v2.begin(), std_v2.end(), vcl_v2.begin()); \ 
 1495   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1496     std_v1[i] = std::FUNCNAME(std_v2[i]); \ 
 1497   vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \ 
 1499   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1501     std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \ 
 1502     return EXIT_FAILURE; \ 
 1505   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1506     std_v1[i] = std::FUNCNAME(std_v1[i] + std_v2[i]); \ 
 1507   vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \ 
 1509   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1511     std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \ 
 1512     return EXIT_FAILURE; \ 
 1515   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1516     std_v1[i] += std::FUNCNAME(std_v1[i]); \ 
 1517   vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \ 
 1519   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1521     std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \ 
 1522     return EXIT_FAILURE; \ 
 1525   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1526     std_v1[i] += std::FUNCNAME(std_v1[i] + std_v2[i]); \ 
 1527   vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \ 
 1529   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1531     std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \ 
 1532     return EXIT_FAILURE; \ 
 1535   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1536     std_v1[i] -= std::FUNCNAME(std_v2[i]); \ 
 1537   vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \ 
 1539   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1541     std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \ 
 1542     return EXIT_FAILURE; \ 
 1545   for (std::size_t i=0; i<std_v1.size(); ++i) \ 
 1546     std_v1[i] -= std::FUNCNAME(std_v1[i] + std_v2[i]); \ 
 1547   vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \ 
 1549   if (check(std_v1, vcl_v1) != EXIT_SUCCESS) \ 
 1551     std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \ 
 1552     return EXIT_FAILURE; \ 
 1570   std::cout << 
"Testing lenghty sum of scaled vectors..." << std::endl;
 
 1571   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1572     std_v2[i] = 3 * std_v1[i];
 
 1576   for (std::size_t i=0; i<std_v1.size(); ++i)
 
 1577     std_v1[i] = std_v2[i] / alpha + beta * std_v1[i] - alpha * std_v2[i] + beta * std_v1[i] - alpha * std_v1[i];
 
 1578   vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
 
 1580   if (
check(std_v1, vcl_v1) != EXIT_SUCCESS)
 
 1581     return EXIT_FAILURE;
 
 1588 template< 
typename NumericT >
 
 1591   int retval = EXIT_SUCCESS;
 
 1592   std::size_t 
size = 12345;
 
 1594   std::cout << 
"Running tests for vector of size " << size << std::endl;
 
 1599   std::vector<NumericT> std_full_vec(size);
 
 1600   std::vector<NumericT> std_full_vec2(std_full_vec.size());
 
 1602   for (std::size_t i=0; i<std_full_vec.size(); ++i)
 
 1608   std::vector<NumericT> std_range_vec (2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
 
 1609   std::vector<NumericT> std_range_vec2(2 * std_full_vec.size() / 4 - std_full_vec.size() / 4);
 
 1611   for (std::size_t i=0; i<std_range_vec.size(); ++i)
 
 1612     std_range_vec[i] = std_full_vec[i + std_full_vec.size() / 4];
 
 1613   for (std::size_t i=0; i<std_range_vec2.size(); ++i)
 
 1614     std_range_vec2[i] = std_full_vec2[i + 2 * std_full_vec2.size() / 4];
 
 1616   std::vector<NumericT> std_slice_vec (std_full_vec.size() / 4);
 
 1617   std::vector<NumericT> std_slice_vec2(std_full_vec.size() / 4);
 
 1619   for (std::size_t i=0; i<std_slice_vec.size(); ++i)
 
 1620     std_slice_vec[i] = std_full_vec[3*i + std_full_vec.size() / 4];
 
 1621   for (std::size_t i=0; i<std_slice_vec2.size(); ++i)
 
 1622     std_slice_vec2[i] = std_full_vec2[2*i + 2 * std_full_vec2.size() / 4];
 
 1631   viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
 
 1633   viennacl::range vcl_r1(    vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
 
 1634   viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
 
 1642     std::vector<NumericT> std_short_vec(std_range_vec);
 
 1643     std::vector<NumericT> std_short_vec2(std_range_vec2);
 
 1645     std::cout << 
"Testing creation of vectors from range..." << std::endl;
 
 1646     if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
 
 1647       return EXIT_FAILURE;
 
 1648     if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
 
 1649       return EXIT_FAILURE;
 
 1652   viennacl::slice vcl_s1(    vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
 
 1653   viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
 
 1660   std::vector<NumericT> std_short_vec(std_slice_vec);
 
 1661   std::vector<NumericT> std_short_vec2(std_slice_vec2);
 
 1663   std::cout << 
"Testing creation of vectors from slice..." << std::endl;
 
 1664   if (
check(std_short_vec, vcl_short_vec) != EXIT_SUCCESS)
 
 1665     return EXIT_FAILURE;
 
 1666   if (
check(std_short_vec2, vcl_short_vec2) != EXIT_SUCCESS)
 
 1667     return EXIT_FAILURE;
 
 1674   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
 
 1675   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1676                           vcl_short_vec, vcl_short_vec2);
 
 1677   if (retval != EXIT_SUCCESS)
 
 1678     return EXIT_FAILURE;
 
 1680   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
 
 1681   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1682                           vcl_short_vec, vcl_range_vec2);
 
 1683   if (retval != EXIT_SUCCESS)
 
 1684     return EXIT_FAILURE;
 
 1686   std::cout << 
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
 
 1687   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1688                           vcl_short_vec, vcl_slice_vec2);
 
 1689   if (retval != EXIT_SUCCESS)
 
 1690     return EXIT_FAILURE;
 
 1694   std::cout << 
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
 
 1695   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1696                           vcl_range_vec, vcl_short_vec2);
 
 1697   if (retval != EXIT_SUCCESS)
 
 1698     return EXIT_FAILURE;
 
 1700   std::cout << 
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
 
 1701   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1702                           vcl_range_vec, vcl_range_vec2);
 
 1703   if (retval != EXIT_SUCCESS)
 
 1704     return EXIT_FAILURE;
 
 1706   std::cout << 
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
 
 1707   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1708                           vcl_range_vec, vcl_slice_vec2);
 
 1709   if (retval != EXIT_SUCCESS)
 
 1710     return EXIT_FAILURE;
 
 1714   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
 
 1715   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1716                           vcl_slice_vec, vcl_short_vec2);
 
 1717   if (retval != EXIT_SUCCESS)
 
 1718     return EXIT_FAILURE;
 
 1720   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
 
 1721   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1722                           vcl_slice_vec, vcl_range_vec2);
 
 1723   if (retval != EXIT_SUCCESS)
 
 1724     return EXIT_FAILURE;
 
 1726   std::cout << 
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
 
 1727   retval = test<NumericT>(std_short_vec, std_short_vec2,
 
 1728                           vcl_slice_vec, vcl_slice_vec2);
 
 1729   if (retval != EXIT_SUCCESS)
 
 1730     return EXIT_FAILURE;
 
 1732   return EXIT_SUCCESS;
 
 1742   std::cout << std::endl;
 
 1743   std::cout << 
"----------------------------------------------" << std::endl;
 
 1744   std::cout << 
"----------------------------------------------" << std::endl;
 
 1745   std::cout << 
"## Test :: Vector with Integer types" << std::endl;
 
 1746   std::cout << 
"----------------------------------------------" << std::endl;
 
 1747   std::cout << 
"----------------------------------------------" << std::endl;
 
 1748   std::cout << std::endl;
 
 1750   int retval = EXIT_SUCCESS;
 
 1752   std::cout << std::endl;
 
 1753   std::cout << 
"----------------------------------------------" << std::endl;
 
 1754   std::cout << std::endl;
 
 1756     std::cout << 
"# Testing setup:" << std::endl;
 
 1757     std::cout << 
"  numeric: int" << std::endl;
 
 1758     retval = test<int>();
 
 1759     if ( retval == EXIT_SUCCESS )
 
 1760       std::cout << 
"# Test passed" << std::endl;
 
 1764   std::cout << std::endl;
 
 1765   std::cout << 
"----------------------------------------------" << std::endl;
 
 1766   std::cout << std::endl;
 
 1768     std::cout << 
"# Testing setup:" << std::endl;
 
 1769     std::cout << 
"  numeric: long" << std::endl;
 
 1770     retval = test<long>();
 
 1771     if ( retval == EXIT_SUCCESS )
 
 1772       std::cout << 
"# Test passed" << std::endl;
 
 1776   std::cout << std::endl;
 
 1777   std::cout << 
"----------------------------------------------" << std::endl;
 
 1778   std::cout << std::endl;
 
 1780   std::cout << std::endl;
 
 1781   std::cout << 
"------- Test completed --------" << std::endl;
 
 1782   std::cout << std::endl;
 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors. 
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations. 
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
int check(T1 const &t1, T2 const &t2)
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
int test(STLVectorType &std_v1, STLVectorType &std_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
Class for representing non-strided subvectors of a bigger vector x. 
Class for representing strided subvectors of a bigger vector x. 
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
Proxy classes for vectors. 
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied. 
Represents a vector consisting of 1 at a given index and zeros otherwise. 
Stub routines for the summation of elements in a vector, or all elements in either a row or column of...
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)