40 template<
typename NumericT, 
typename VCLMatrixType>
 
   41 bool check_for_equality(std::vector<std::vector<NumericT> > 
const & std_A, VCLMatrixType 
const & vcl_A, 
double epsilon)
 
   43   std::vector<std::vector<NumericT> > vcl_A_cpu(vcl_A.size1(), std::vector<NumericT>(vcl_A.size2()));
 
   47   for (std::size_t i=0; i<std_A.size(); ++i)
 
   49     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
   51       if (std::fabs(std_A[i][j] - vcl_A_cpu[i][j]) > 0)
 
   53         if ( (std::abs(std_A[i][j] - vcl_A_cpu[i][j]) / 
std::max(std::fabs(std_A[i][j]), std::fabs(vcl_A_cpu[i][j])) > epsilon) || std::fabs(vcl_A_cpu[i][j] - vcl_A_cpu[i][j]) > 0 )
 
   55           std::cout << 
"Error at index (" << i << 
", " << j << 
"): " << std_A[i][j] << 
" vs " << vcl_A_cpu[i][j] << std::endl;
 
   56           std::cout << std::endl << 
"TEST failed!" << std::endl;
 
   63   std::cout << 
"PASSED!" << std::endl;
 
   70 template<
typename STLMatrixType,
 
   71           typename ViennaCLMatrixType1, 
typename ViennaCLMatrixType2, 
typename ViennaCLMatrixType3>
 
   73              STLMatrixType & std_A, STLMatrixType & std_B, STLMatrixType & std_C,
 
   74              ViennaCLMatrixType1 & vcl_A, ViennaCLMatrixType2 & vcl_B, ViennaCLMatrixType3 vcl_C)
 
   79   cpu_value_type alpha = cpu_value_type(3.1415);
 
   82   cpu_value_type beta = cpu_value_type(2.7182);
 
   89   std::cout << 
"Checking for zero_matrix initializer..." << std::endl;
 
   90   std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size()));
 
   95   std::cout << 
"Checking for scalar_matrix initializer..." << std::endl;
 
   96   std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size(), alpha));
 
  101   std_A = std::vector<std::vector<cpu_value_type> >(std_A.size(), std::vector<cpu_value_type>(std_A[0].size(), gpu_beta));
 
  113   std::cout << std::endl;
 
  121   std::cout << 
"Testing matrix assignment... ";
 
  128   if (std_A.size() == std_A[0].size())
 
  130     std::cout << 
"Testing matrix assignment (transposed)... ";
 
  131     for (std::size_t i=0; i<std_A.size(); ++i)
 
  132       for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  133         std_A[i][j] = std_B[j][i];
 
  149   std::cout << 
"Testing upper left copy to GPU... ";
 
  156   std::cout << 
"Testing lower right copy to GPU... ";
 
  166   std::cout << 
"Testing upper left copy to A... ";
 
  170   std::cout << 
"Testing lower right copy to C... ";
 
  181   std::cout << 
"Inplace add: ";
 
  182   for (std::size_t i=0; i<std_C.size(); ++i)
 
  183     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  184       std_C[i][j] += std_C[i][j];
 
  190   if (std_C.size() == std_C[0].size())
 
  192     std::cout << 
"Inplace add (transposed): ";
 
  193     STLMatrixType std_C2(std_C);
 
  194     for (std::size_t i=0; i<std_C.size(); ++i)
 
  195       for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  196         std_C[i][j] += std_C2[j][i];
 
  202     std::cout << 
"Inplace add (transposed, expression): ";
 
  204     for (std::size_t i=0; i<std_C.size(); ++i)
 
  205       for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  206         std_C[i][j] += std_C2[j][i] + std_C2[j][i];
 
  213   std::cout << 
"Scaled inplace add: ";
 
  214   for (std::size_t i=0; i<std_C.size(); ++i)
 
  215     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  216       std_C[i][j] += beta * std_A[i][j];
 
  217   vcl_C   += gpu_beta * vcl_A;
 
  222   std::cout << 
"Add: ";
 
  223   for (std::size_t i=0; i<std_C.size(); ++i)
 
  224     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  225       std_C[i][j] = std_A[i][j] + std_B[i][j];
 
  226   vcl_C   =   vcl_A +   vcl_B;
 
  231   std::cout << 
"Add with flipsign: ";
 
  232   for (std::size_t i=0; i<std_C.size(); ++i)
 
  233     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  234       std_C[i][j] = -std_A[i][j] + std_B[i][j];
 
  235   vcl_C   = -   vcl_A +   vcl_B;
 
  241   std::cout << 
"Scaled add (left): ";
 
  242   for (std::size_t i=0; i<std_C.size(); ++i)
 
  243     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  244       std_C[i][j] = cpu_value_type(
long(alpha)) * std_A[i][j] + std_B[i][j];
 
  245   vcl_C   = long(alpha) *   vcl_A +   vcl_B;
 
  250   for (std::size_t i=0; i<std_C.size(); ++i)
 
  251     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  252       std_C[i][j] = cpu_value_type(
float(alpha)) * std_A[i][j] + std_B[i][j];
 
  253   vcl_C   = float(alpha) *   vcl_A +   vcl_B;
 
  258   for (std::size_t i=0; i<std_C.size(); ++i)
 
  259     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  260       std_C[i][j] = cpu_value_type(
double(alpha)) * std_A[i][j] + std_B[i][j];
 
  261   vcl_C   = double(alpha) *   vcl_A +   vcl_B;
 
  266   std::cout << 
"Scaled add (left): ";
 
  267   vcl_C = gpu_alpha * vcl_A + vcl_B;
 
  272   std::cout << 
"Scaled add (right): ";
 
  273   for (std::size_t i=0; i<std_C.size(); ++i)
 
  274     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  275       std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
long(beta));
 
  276   vcl_C   =   vcl_A + vcl_B   * long(beta);
 
  281   for (std::size_t i=0; i<std_C.size(); ++i)
 
  282     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  283       std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
float(beta));
 
  284   vcl_C   =   vcl_A + vcl_B   * float(beta);
 
  289   for (std::size_t i=0; i<std_C.size(); ++i)
 
  290     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  291       std_C[i][j] = std_A[i][j] + std_B[i][j] * cpu_value_type(
double(beta));
 
  292   vcl_C   =   vcl_A + vcl_B   * double(beta);
 
  297   std::cout << 
"Scaled add (right): ";
 
  298   vcl_C = vcl_A + gpu_beta * vcl_B;
 
  302   std::cout << 
"Scaled add (right, with division): ";
 
  303   for (std::size_t i=0; i<std_C.size(); ++i)
 
  304     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  305       std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
long(beta));
 
  306   vcl_C   =   vcl_A + vcl_B   / long(beta);
 
  311   for (std::size_t i=0; i<std_C.size(); ++i)
 
  312     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  313       std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
float(beta));
 
  314   vcl_C   =   vcl_A + vcl_B   / float(beta);
 
  319   for (std::size_t i=0; i<std_C.size(); ++i)
 
  320     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  321       std_C[i][j] = std_A[i][j] + std_B[i][j] / cpu_value_type(
double(beta));
 
  322   vcl_C   =   vcl_A + vcl_B   / double(beta);
 
  328   std::cout << 
"Scaled add (both): ";
 
  329   for (std::size_t i=0; i<std_C.size(); ++i)
 
  330     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  331       std_C[i][j] = alpha * std_A[i][j] + beta * std_B[i][j];
 
  332   vcl_C   = alpha *   vcl_A + beta *   vcl_B;
 
  337   std::cout << 
"Scaled add (both): ";
 
  338   vcl_C = gpu_alpha * vcl_A + gpu_beta * vcl_B;
 
  347   std::cout << 
"Inplace sub: ";
 
  348   for (std::size_t i=0; i<std_C.size(); ++i)
 
  349     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  350       std_C[i][j] -= std_B[i][j];
 
  356   if (std_C.size() == std_C[0].size())
 
  358     std::cout << 
"Inplace sub (transposed): ";
 
  359     STLMatrixType std_C2(std_C);
 
  360     for (std::size_t i=0; i<std_C.size(); ++i)
 
  361       for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  362         std_C[i][j] -= cpu_value_type(2) * std_C2[j][i];
 
  368     std::cout << 
"Inplace sub (transposed, expression): ";
 
  370     for (std::size_t i=0; i<std_C.size(); ++i)
 
  371       for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  372         std_C[i][j] -= std_C2[j][i] + std_C2[j][i];
 
  379   std::cout << 
"Scaled Inplace sub: ";
 
  380   for (std::size_t i=0; i<std_C.size(); ++i)
 
  381     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  382       std_C[i][j] -= alpha * std_B[i][j];
 
  383   vcl_C -= alpha * vcl_B;
 
  391   std::cout << 
"Sub: ";
 
  392   for (std::size_t i=0; i<std_C.size(); ++i)
 
  393     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  394       std_C[i][j] = std_A[i][j] - std_B[i][j];
 
  395   vcl_C = vcl_A - vcl_B;
 
  400   std::cout << 
"Scaled sub (left): ";
 
  401   for (std::size_t i=0; i<std_C.size(); ++i)
 
  402     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  403       std_B[i][j] = alpha * std_A[i][j] - std_C[i][j];
 
  404   vcl_B   = alpha *   vcl_A - vcl_C;
 
  409   std::cout << 
"Scaled sub (left): ";
 
  410   vcl_B = gpu_alpha * vcl_A - vcl_C;
 
  415   std::cout << 
"Scaled sub (right): ";
 
  416   for (std::size_t i=0; i<std_C.size(); ++i)
 
  417     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  418       std_B[i][j] = std_A[i][j] - beta * std_C[i][j];
 
  419   vcl_B   =   vcl_A - vcl_C * beta;
 
  424   std::cout << 
"Scaled sub (right): ";
 
  425   vcl_B = vcl_A - vcl_C * gpu_beta;
 
  430   std::cout << 
"Scaled sub (both): ";
 
  431   for (std::size_t i=0; i<std_C.size(); ++i)
 
  432     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  433       std_B[i][j] = alpha * std_A[i][j] - beta * std_C[i][j];
 
  434   vcl_B = alpha * vcl_A - vcl_C * beta;
 
  439   std::cout << 
"Scaled sub (both): ";
 
  440   vcl_B = gpu_alpha * vcl_A - vcl_C * gpu_beta;
 
  445   std::cout << 
"Unary operator-: ";
 
  446   for (std::size_t i=0; i<std_C.size(); ++i)
 
  447     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  448       std_C[i][j] = - std_A[i][j];
 
  461   std::cout << 
"Multiplication with CPU scalar: ";
 
  462   for (std::size_t i=0; i<std_A.size(); ++i)
 
  463     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  464       std_A[i][j] *= cpu_value_type(
long(alpha));
 
  465   vcl_A   *= long(alpha);
 
  470   for (std::size_t i=0; i<std_A.size(); ++i)
 
  471     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  472       std_A[i][j] *= cpu_value_type(
float(alpha));
 
  473   vcl_A   *= float(alpha);
 
  478   for (std::size_t i=0; i<std_A.size(); ++i)
 
  479     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  480       std_A[i][j] *= cpu_value_type(
double(alpha));
 
  481   vcl_A   *= double(alpha);
 
  486   std::cout << 
"Multiplication with GPU scalar: ";
 
  487   for (std::size_t i=0; i<std_A.size(); ++i)
 
  488     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  496   std::cout << 
"Division with CPU scalar: ";
 
  497   for (std::size_t i=0; i<std_A.size(); ++i)
 
  498     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  499       std_A[i][j] /= cpu_value_type(
long(alpha));
 
  500   vcl_A   /= long(alpha);
 
  505   for (std::size_t i=0; i<std_A.size(); ++i)
 
  506     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  507       std_A[i][j] /= cpu_value_type(
float(alpha));
 
  508   vcl_A   /= float(alpha);
 
  513   for (std::size_t i=0; i<std_A.size(); ++i)
 
  514     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  515       std_A[i][j] /= cpu_value_type(
double(alpha));
 
  516   vcl_A   /= double(alpha);
 
  521   std::cout << 
"Division with GPU scalar: ";
 
  522   for (std::size_t i=0; i<std_A.size(); ++i)
 
  523     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  532   std::cout << 
"Testing elementwise multiplication..." << std::endl;
 
  533   std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
 
  534   for (std::size_t i=0; i<std_A.size(); ++i)
 
  535     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  536       std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
 
  540   for (std::size_t i=0; i<std_A.size(); ++i)
 
  541     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  542       std_A[i][j] = std_A[i][j] * std_B[i][j];
 
  548   for (std::size_t i=0; i<std_A.size(); ++i)
 
  549     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  550       std_A[i][j] += std_A[i][j] * std_B[i][j];
 
  556   for (std::size_t i=0; i<std_A.size(); ++i)
 
  557     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  558       std_A[i][j] -= std_A[i][j] * std_B[i][j];
 
  565   for (std::size_t i=0; i<std_A.size(); ++i)
 
  566     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  567       std_A[i][j] = (std_A[i][j] + std_B[i][j]) * std_B[i][j];
 
  573   for (std::size_t i=0; i<std_A.size(); ++i)
 
  574     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  575       std_A[i][j] += (std_A[i][j] + std_B[i][j]) * std_B[i][j];
 
  581   for (std::size_t i=0; i<std_A.size(); ++i)
 
  582     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  583       std_A[i][j] -= (std_A[i][j] + std_B[i][j]) * std_B[i][j];
 
  590   for (std::size_t i=0; i<std_A.size(); ++i)
 
  591     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  592       std_A[i][j] = std_A[i][j] * (std_B[i][j] + std_A[i][j]);
 
  598   for (std::size_t i=0; i<std_A.size(); ++i)
 
  599     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  600       std_A[i][j] += std_A[i][j] * (std_B[i][j] + std_A[i][j]);
 
  606   for (std::size_t i=0; i<std_A.size(); ++i)
 
  607     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  608       std_A[i][j] -= std_A[i][j] * (std_B[i][j] + std_A[i][j]);
 
  615   for (std::size_t i=0; i<std_A.size(); ++i)
 
  616     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  617       std_A[i][j] = (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
 
  623   for (std::size_t i=0; i<std_A.size(); ++i)
 
  624     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  625       std_A[i][j] += (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
 
  631   for (std::size_t i=0; i<std_A.size(); ++i)
 
  632     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  633       std_A[i][j] -= (std_A[i][j] + std_B[i][j]) * (std_B[i][j] + std_A[i][j]);
 
  640   std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
 
  641   for (std::size_t i=0; i<std_A.size(); ++i)
 
  642     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  643       std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
 
  647   for (std::size_t i=0; i<std_A.size(); ++i)
 
  648     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  649       std_A[i][j] = std_A[i][j] / std_B[i][j];
 
  655   for (std::size_t i=0; i<std_A.size(); ++i)
 
  656     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  657       std_A[i][j] += std_A[i][j] / std_B[i][j];
 
  663   for (std::size_t i=0; i<std_A.size(); ++i)
 
  664     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  665       std_A[i][j] -= std_A[i][j] / std_B[i][j];
 
  672   for (std::size_t i=0; i<std_A.size(); ++i)
 
  673     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  674       std_A[i][j] = (std_A[i][j] + std_B[i][j]) / std_B[i][j];
 
  680   for (std::size_t i=0; i<std_A.size(); ++i)
 
  681     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  682       std_A[i][j] += (std_A[i][j] + std_B[i][j]) / std_B[i][j];
 
  688   for (std::size_t i=0; i<std_A.size(); ++i)
 
  689     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  690       std_A[i][j] -= (std_A[i][j] + std_B[i][j]) / std_B[i][j];
 
  697   for (std::size_t i=0; i<std_A.size(); ++i)
 
  698     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  699       std_A[i][j] = std_A[i][j] / (std_B[i][j] + std_A[i][j]);
 
  705   for (std::size_t i=0; i<std_A.size(); ++i)
 
  706     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  707       std_A[i][j] += std_A[i][j] / (std_B[i][j] + std_A[i][j]);
 
  713   for (std::size_t i=0; i<std_A.size(); ++i)
 
  714     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  715       std_A[i][j] -= std_A[i][j] / (std_B[i][j] + std_A[i][j]);
 
  722   for (std::size_t i=0; i<std_A.size(); ++i)
 
  723     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  724       std_A[i][j] = (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
 
  730   for (std::size_t i=0; i<std_A.size(); ++i)
 
  731     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  732       std_A[i][j] += (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
 
  738   for (std::size_t i=0; i<std_A.size(); ++i)
 
  739     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  740       std_A[i][j] -= (std_A[i][j] + std_B[i][j]) / (std_B[i][j] + std_A[i][j]);
 
  747   std::cout << 
"Testing unary element_pow()..." << std::endl;
 
  749   std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142)));
 
  750   for (std::size_t i=0; i<std_A.size(); ++i)
 
  751     for (std::size_t j=0; j<std_A[i].size(); ++j)
 
  752       std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j];
 
  756   for (std::size_t i=0; i<std_C.size(); ++i)
 
  757     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  758       std_C[i][j] = std::pow(std_A[i][j], std_B[i][j]);
 
  759   vcl_C = viennacl::linalg::element_pow(vcl_A, vcl_B);
 
  764   for (std::size_t i=0; i<std_C.size(); ++i)
 
  765     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  766       std_C[i][j] += std::pow(std_A[i][j], std_B[i][j]);
 
  767   vcl_C += viennacl::linalg::element_pow(vcl_A, vcl_B);
 
  772   for (std::size_t i=0; i<std_C.size(); ++i)
 
  773     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  774       std_C[i][j] -= std::pow(std_A[i][j], std_B[i][j]);
 
  775   vcl_C -= viennacl::linalg::element_pow(vcl_A, vcl_B);
 
  781   for (std::size_t i=0; i<std_C.size(); ++i)
 
  782     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  783       std_C[i][j] = std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
 
  784   vcl_C = viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
 
  789   for (std::size_t i=0; i<std_C.size(); ++i)
 
  790     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  791       std_C[i][j] += std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
 
  792   vcl_C += viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
 
  797   for (std::size_t i=0; i<std_C.size(); ++i)
 
  798     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  799       std_C[i][j] -= std::pow(std_A[i][j] + std_B[i][j], std_B[i][j]);
 
  800   vcl_C -= viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B);
 
  806   for (std::size_t i=0; i<std_C.size(); ++i)
 
  807     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  808       std_C[i][j] = std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
 
  809   vcl_C = viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
 
  814   for (std::size_t i=0; i<std_C.size(); ++i)
 
  815     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  816       std_C[i][j] += std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
 
  817   vcl_C += viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
 
  822   for (std::size_t i=0; i<std_C.size(); ++i)
 
  823     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  824       std_C[i][j] -= std::pow(std_A[i][j], std_B[i][j] + std_A[i][j]);
 
  825   vcl_C -= viennacl::linalg::element_pow(vcl_A, vcl_B + vcl_A);
 
  831   for (std::size_t i=0; i<std_C.size(); ++i)
 
  832     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  833       std_C[i][j] = std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
 
  834   vcl_C = viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
 
  839   for (std::size_t i=0; i<std_C.size(); ++i)
 
  840     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  841       std_C[i][j] += std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
 
  842   vcl_C += viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
 
  847   for (std::size_t i=0; i<std_C.size(); ++i)
 
  848     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  849       std_C[i][j] -= std::pow(std_A[i][j] + std_B[i][j], std_B[i][j] + std_A[i][j]);
 
  850   vcl_C -= viennacl::linalg::element_pow(vcl_A + vcl_B, vcl_B + vcl_A);
 
  856   std::cout << 
"Testing unary elementwise operations..." << std::endl;
 
  858 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \ 
  859   std_B = std::vector<std::vector<cpu_value_type> >(std_B.size(), std::vector<cpu_value_type>(std_B[0].size(), cpu_value_type(1.4142))); \ 
  860   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  861     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  862       std_A[i][j] = cpu_value_type(3.1415) * std_B[i][j]; \ 
  863   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  864     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  865       std_C[i][j] = cpu_value_type(2.7172) * std_A[i][j]; \ 
  866   viennacl::copy(std_A, vcl_A); \ 
  867   viennacl::copy(std_B, vcl_B); \ 
  868   viennacl::copy(std_C, vcl_C); \ 
  869   viennacl::copy(std_B, vcl_B); \ 
  871   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  872     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  873       std_C[i][j] = std::FUNCNAME(std_A[i][j]); \ 
  874   vcl_C = viennacl::linalg::element_##FUNCNAME(vcl_A); \ 
  876   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  878     std::cout << "Failure at C = " << #FUNCNAME << "(A)" << std::endl; \ 
  879     return EXIT_FAILURE; \ 
  882   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  883     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  884       std_C[i][j] = std::FUNCNAME(std_A[i][j] + std_B[i][j]); \ 
  885   vcl_C = viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \ 
  887   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  889     std::cout << "Failure at C = " << #FUNCNAME << "(A + B)" << std::endl; \ 
  890     return EXIT_FAILURE; \ 
  893   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  894     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  895       std_C[i][j] += std::FUNCNAME(std_A[i][j]); \ 
  896   vcl_C += viennacl::linalg::element_##FUNCNAME(vcl_A); \ 
  898   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  900     std::cout << "Failure at C += " << #FUNCNAME << "(A)" << std::endl; \ 
  901     return EXIT_FAILURE; \ 
  904   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  905     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  906       std_C[i][j] += std::FUNCNAME(std_A[i][j] + std_B[i][j]); \ 
  907   vcl_C += viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \ 
  909   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  911     std::cout << "Failure at C += " << #FUNCNAME << "(A + B)" << std::endl; \ 
  912     return EXIT_FAILURE; \ 
  915   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  916     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  917       std_C[i][j] -= std::FUNCNAME(std_A[i][j]); \ 
  918   vcl_C -= viennacl::linalg::element_##FUNCNAME(vcl_A); \ 
  920   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  922     std::cout << "Failure at C -= " << #FUNCNAME << "(A)" << std::endl; \ 
  923     return EXIT_FAILURE; \ 
  926   for (std::size_t i=0; i<std_C.size(); ++i) \ 
  927     for (std::size_t j=0; j<std_C[i].size(); ++j) \ 
  928       std_C[i][j] -= std::FUNCNAME(std_A[i][j] + std_B[i][j]); \ 
  929   vcl_C -= viennacl::linalg::element_##FUNCNAME(vcl_A + vcl_B); \ 
  931   if (!check_for_equality(std_C, vcl_C, epsilon)) \ 
  933     std::cout << "Failure at C -= " << #FUNCNAME << "(A + B)" << std::endl; \ 
  934     return EXIT_FAILURE; \ 
  952   std::cout << 
"Complicated expressions: ";
 
  956   for (std::size_t i=0; i<std_C.size(); ++i)
 
  957     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  958       std_B[i][j] +=     alpha * (- std_A[i][j] - beta * std_C[i][j] + std_A[i][j]);
 
  959   vcl_B += gpu_alpha * (-   vcl_A - vcl_C * beta   +   vcl_A);
 
  964   for (std::size_t i=0; i<std_C.size(); ++i)
 
  965     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  966       std_B[i][j] += (- std_A[i][j] - beta * std_C[i][j] + std_A[i][j] * beta) / gpu_alpha;
 
  967   vcl_B += (-   vcl_A - vcl_C * beta + gpu_beta * vcl_A) / gpu_alpha;
 
  973   for (std::size_t i=0; i<std_C.size(); ++i)
 
  974     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  975       std_B[i][j] -=     alpha * (- std_A[i][j] - beta * std_C[i][j] - std_A[i][j]);
 
  976   vcl_B -= gpu_alpha * (-   vcl_A - vcl_C * beta - vcl_A);
 
  981   for (std::size_t i=0; i<std_C.size(); ++i)
 
  982     for (std::size_t j=0; j<std_C[i].size(); ++j)
 
  983       std_B[i][j] -= (- std_A[i][j] - beta * std_C[i][j] - std_A[i][j] * beta) / alpha;
 
  984   vcl_B   -= (-   vcl_A - vcl_C * beta - gpu_beta * vcl_A) / gpu_alpha;
 
  989   std::cout << std::endl;
 
  990   std::cout << 
"----------------------------------------------" << std::endl;
 
  991   std::cout << std::endl;
 
 1000 template<
typename T, 
typename ScalarType>
 
 1006     std::size_t dim_rows = 131;
 
 1007     std::size_t dim_cols = 33;
 
 1012     std::vector<std::vector<ScalarType> > std_A(dim_rows, std::vector<ScalarType>(dim_cols));
 
 1013     std::vector<std::vector<ScalarType> > std_B(dim_rows, std::vector<ScalarType>(dim_cols));
 
 1014     std::vector<std::vector<ScalarType> > std_C(dim_rows, std::vector<ScalarType>(dim_cols));
 
 1016     for (std::size_t i=0; i<std_A.size(); ++i)
 
 1017       for (std::size_t j=0; j<std_A[i].size(); ++j)
 
 1019         std_A[i][j] = 
ScalarType((i+2) + (j+1)*(i+2));
 
 1020         std_B[i][j] = 
ScalarType((j+2) + (j+1)*(j+2));
 
 1021         std_C[i][j] = 
ScalarType((i+1) + (i+1)*(i+2));
 
 1024     std::vector<std::vector<ScalarType> > std_A_large(4 * dim_rows, std::vector<ScalarType>(4 * dim_cols));
 
 1025     for (std::size_t i=0; i<std_A_large.size(); ++i)
 
 1026       for (std::size_t j=0; j<std_A_large[i].size(); ++j)
 
 1030     VCLMatrixType vcl_A_full(4 * dim_rows, 4 * dim_cols);
 
 1031     VCLMatrixType vcl_B_full(4 * dim_rows, 4 * dim_cols);
 
 1032     VCLMatrixType vcl_C_full(4 * dim_rows, 4 * dim_cols);
 
 1041     VCLMatrixType vcl_A(dim_rows, dim_cols);
 
 1055     VCLMatrixType vcl_B(dim_rows, dim_cols);
 
 1069     VCLMatrixType vcl_C(dim_rows, dim_cols);
 
 1092     std::cout << std::endl;
 
 1093     std::cout << 
"//" << std::endl;
 
 1094     std::cout << 
"////////// Test: Copy CTOR //////////" << std::endl;
 
 1095     std::cout << 
"//" << std::endl;
 
 1098       std::cout << 
"Testing matrix created from range... ";
 
 1099       VCLMatrixType vcl_temp = vcl_range_A;
 
 1101         std::cout << 
"PASSED!" << std::endl;
 
 1104         std::cout << 
"vcl_temp: " << vcl_temp << std::endl;
 
 1105         std::cout << 
"vcl_range_A: " << vcl_range_A << std::endl;
 
 1106         std::cout << 
"vcl_A: " << vcl_A << std::endl;
 
 1107         std::cout << std::endl << 
"TEST failed!" << std::endl;
 
 1108         return EXIT_FAILURE;
 
 1111       std::cout << 
"Testing matrix created from slice... ";
 
 1112       VCLMatrixType vcl_temp2 = vcl_range_B;
 
 1114         std::cout << 
"PASSED!" << std::endl;
 
 1117         std::cout << std::endl << 
"TEST failed!" << std::endl;
 
 1118         return EXIT_FAILURE;
 
 1122     std::cout << 
"//" << std::endl;
 
 1123     std::cout << 
"////////// Test: Initializer for matrix type //////////" << std::endl;
 
 1124     std::cout << 
"//" << std::endl;
 
 1127       std::vector<std::vector<ScalarType> > std_dummy1(std_A.size(), std::vector<ScalarType>(std_A.size()));
 
 1128       for (std::size_t i=0; i<std_A.size(); ++i) std_dummy1[i][i] = 
ScalarType(1);
 
 1129       std::vector<std::vector<ScalarType> > std_dummy2(std_A.size(), std::vector<ScalarType>(std_A.size(), 
ScalarType(3)));
 
 1130       std::vector<std::vector<ScalarType> > std_dummy3(std_A.size(), std::vector<ScalarType>(std_A.size()));
 
 1136       std::cout << 
"Testing initializer CTOR... ";
 
 1141         std::cout << 
"PASSED!" << std::endl;
 
 1144         std::cout << std::endl << 
"TEST failed!" << std::endl;
 
 1145         return EXIT_FAILURE;
 
 1148       std_dummy1 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size()));
 
 1149       std_dummy2 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size()));
 
 1150       for (std::size_t i=0; i<std_A.size(); ++i) std_dummy2[i][i] = 
ScalarType(1);
 
 1151       std_dummy3 = std::vector<std::vector<ScalarType> >(std_A.size(), std::vector<ScalarType>(std_A.size(), 3));
 
 1157       std::cout << 
"Testing initializer assignment... ";
 
 1162         std::cout << 
"PASSED!" << std::endl;
 
 1165         std::cout << std::endl << 
"TEST failed!" << std::endl;
 
 1166         return EXIT_FAILURE;
 
 1170     std::cout << 
"//" << std::endl;
 
 1171     std::cout << 
"////////// Test: Norms //////////" << std::endl;
 
 1172     std::cout << 
"//" << std::endl;
 
 1191     for (std::size_t i=0; i<std_C.size(); ++i)
 
 1192       for (std::size_t j=0; j<std_C[i].size(); ++j)
 
 1193         std_norm_frobenius += std_C[i][j] * std_C[i][j];
 
 1194     std_norm_frobenius = std::sqrt(std_norm_frobenius);
 
 1196     if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
 
 1198       std::cerr << 
"Failure at norm_frobenius()" << std::endl;
 
 1199       return EXIT_FAILURE;
 
 1204     if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
 
 1206       std::cerr << 
"Failure at norm_frobenius()" << std::endl;
 
 1207       return EXIT_FAILURE;
 
 1211     if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
 
 1213       std::cerr << 
"Failure at norm_frobenius() with range" << std::endl;
 
 1214       return EXIT_FAILURE;
 
 1218     if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
 
 1220       std::cerr << 
"Failure at norm_frobenius() with range" << std::endl;
 
 1221       return EXIT_FAILURE;
 
 1225     if ( std::fabs(std_norm_frobenius - vcl_norm_frobenius) / std_norm_frobenius > epsilon)
 
 1227       std::cerr << 
"Failure at norm_frobenius() with slice" << std::endl;
 
 1228       return EXIT_FAILURE;
 
 1232     if ( std::fabs(device_std_norm_frobenius - device_vcl_norm_frobenius) / device_std_norm_frobenius > epsilon)
 
 1234       std::cerr << 
"Failure at norm_frobenius() with slice" << std::endl;
 
 1235       return EXIT_FAILURE;
 
 1238     std::cout << 
"PASSED!" << std::endl;
 
 1245     std::cout << 
"//" << std::endl;
 
 1246     std::cout << 
"////////// Test: Operations //////////" << std::endl;
 
 1247     std::cout << 
"//" << std::endl;
 
 1251     std::cout << 
"Testing A=matrix, B=matrix, C=matrix ..." << std::endl;
 
 1256                  std_A, std_B, std_C,
 
 1257                  vcl_A, vcl_B, vcl_C) != EXIT_SUCCESS)
 
 1259       return EXIT_FAILURE;
 
 1262     std::cout << 
"Testing A=matrix, B=matrix, C=range ..." << std::endl;
 
 1267                  std_A, std_B, std_C,
 
 1268                  vcl_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
 
 1270       return EXIT_FAILURE;
 
 1273     std::cout << 
"Testing A=matrix, B=matrix, C=slice ..." << std::endl;
 
 1278                  std_A, std_B, std_C,
 
 1279                  vcl_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1281       return EXIT_FAILURE;
 
 1284     std::cout << 
"Testing A=matrix, B=range, C=matrix ..." << std::endl;
 
 1289                  std_A, std_B, std_C,
 
 1290                  vcl_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
 
 1292       return EXIT_FAILURE;
 
 1295     std::cout << 
"Testing A=matrix, B=range, C=range ..." << std::endl;
 
 1300                  std_A, std_B, std_C,
 
 1301                  vcl_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
 
 1303       return EXIT_FAILURE;
 
 1306     std::cout << 
"Testing A=matrix, B=range, C=slice ..." << std::endl;
 
 1311                  std_A, std_B, std_C,
 
 1312                  vcl_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1314       return EXIT_FAILURE;
 
 1318     std::cout << 
"Testing A=matrix, B=slice, C=matrix ..." << std::endl;
 
 1323                  std_A, std_B, std_C,
 
 1324                  vcl_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
 
 1326       return EXIT_FAILURE;
 
 1329     std::cout << 
"Testing A=matrix, B=slice, C=range ..." << std::endl;
 
 1334                  std_A, std_B, std_C,
 
 1335                  vcl_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
 
 1337       return EXIT_FAILURE;
 
 1340     std::cout << 
"Testing A=matrix, B=slice, C=slice ..." << std::endl;
 
 1345                  std_A, std_B, std_C,
 
 1346                  vcl_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1348       return EXIT_FAILURE;
 
 1354     std::cout << 
"Testing A=range, B=matrix, C=matrix ..." << std::endl;
 
 1359                  std_A, std_B, std_C,
 
 1360                  vcl_range_A, vcl_B, vcl_C) != EXIT_SUCCESS)
 
 1362       return EXIT_FAILURE;
 
 1365     std::cout << 
"Testing A=range, B=matrix, C=range ..." << std::endl;
 
 1370                  std_A, std_B, std_C,
 
 1371                  vcl_range_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
 
 1373       return EXIT_FAILURE;
 
 1376     std::cout << 
"Testing A=range, B=matrix, C=slice ..." << std::endl;
 
 1381                  std_A, std_B, std_C,
 
 1382                  vcl_range_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1384       return EXIT_FAILURE;
 
 1389     std::cout << 
"Testing A=range, B=range, C=matrix ..." << std::endl;
 
 1394                  std_A, std_B, std_C,
 
 1395                  vcl_range_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
 
 1397       return EXIT_FAILURE;
 
 1400     std::cout << 
"Testing A=range, B=range, C=range ..." << std::endl;
 
 1405                  std_A, std_B, std_C,
 
 1406                  vcl_range_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
 
 1408       return EXIT_FAILURE;
 
 1411     std::cout << 
"Testing A=range, B=range, C=slice ..." << std::endl;
 
 1416                  std_A, std_B, std_C,
 
 1417                  vcl_range_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1419       return EXIT_FAILURE;
 
 1424     std::cout << 
"Testing A=range, B=slice, C=matrix ..." << std::endl;
 
 1429                  std_A, std_B, std_C,
 
 1430                  vcl_range_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
 
 1432       return EXIT_FAILURE;
 
 1435     std::cout << 
"Testing A=range, B=slice, C=range ..." << std::endl;
 
 1440                  std_A, std_B, std_C,
 
 1441                  vcl_range_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
 
 1443       return EXIT_FAILURE;
 
 1446     std::cout << 
"Testing A=range, B=slice, C=slice ..." << std::endl;
 
 1451                  std_A, std_B, std_C,
 
 1452                  vcl_range_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1454       return EXIT_FAILURE;
 
 1459     std::cout << 
"Testing A=slice, B=matrix, C=matrix ..." << std::endl;
 
 1464                  std_A, std_B, std_C,
 
 1465                  vcl_slice_A, vcl_B, vcl_C) != EXIT_SUCCESS)
 
 1467       return EXIT_FAILURE;
 
 1470     std::cout << 
"Testing A=slice, B=matrix, C=range ..." << std::endl;
 
 1475                  std_A, std_B, std_C,
 
 1476                  vcl_slice_A, vcl_B, vcl_range_C) != EXIT_SUCCESS)
 
 1478       return EXIT_FAILURE;
 
 1481     std::cout << 
"Testing A=slice, B=matrix, C=slice ..." << std::endl;
 
 1486                  std_A, std_B, std_C,
 
 1487                  vcl_slice_A, vcl_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1489       return EXIT_FAILURE;
 
 1494     std::cout << 
"Testing A=slice, B=range, C=matrix ..." << std::endl;
 
 1499                  std_A, std_B, std_C,
 
 1500                  vcl_slice_A, vcl_range_B, vcl_C) != EXIT_SUCCESS)
 
 1502       return EXIT_FAILURE;
 
 1505     std::cout << 
"Testing A=slice, B=range, C=range ..." << std::endl;
 
 1510                  std_A, std_B, std_C,
 
 1511                  vcl_slice_A, vcl_range_B, vcl_range_C) != EXIT_SUCCESS)
 
 1513       return EXIT_FAILURE;
 
 1516     std::cout << 
"Testing A=slice, B=range, C=slice ..." << std::endl;
 
 1521                  std_A, std_B, std_C,
 
 1522                  vcl_slice_A, vcl_range_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1524       return EXIT_FAILURE;
 
 1529     std::cout << 
"Testing A=slice, B=slice, C=matrix ..." << std::endl;
 
 1534                  std_A, std_B, std_C,
 
 1535                  vcl_slice_A, vcl_slice_B, vcl_C) != EXIT_SUCCESS)
 
 1537       return EXIT_FAILURE;
 
 1540     std::cout << 
"Testing A=slice, B=slice, C=range ..." << std::endl;
 
 1545                  std_A, std_B, std_C,
 
 1546                  vcl_slice_A, vcl_slice_B, vcl_range_C) != EXIT_SUCCESS)
 
 1548       return EXIT_FAILURE;
 
 1551     std::cout << 
"Testing A=slice, B=slice, C=slice ..." << std::endl;
 
 1556                  std_A, std_B, std_C,
 
 1557                  vcl_slice_A, vcl_slice_B, vcl_slice_C) != EXIT_SUCCESS)
 
 1559       return EXIT_FAILURE;
 
 1563     return EXIT_SUCCESS;
 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix. 
Class for representing strided submatrices of a bigger matrix A. 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class. 
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
T max(const T &lhs, const T &rhs)
Maximum. 
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
Generic interface for the Frobenius norm. 
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only. 
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors. 
Proxy classes for matrices. 
bool check_for_equality(std::vector< std::vector< NumericT > > const &std_A, VCLMatrixType const &vcl_A, double epsilon)
scalar_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_norm_frobenius > norm_frobenius(const matrix_base< NumericT > &A)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
Class for representing non-strided submatrices of a bigger matrix A. 
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
Implementation of the ViennaCL scalar class. 
int run_test(double epsilon, STLMatrixType &std_A, STLMatrixType &std_B, STLMatrixType &std_C, ViennaCLMatrixType1 &vcl_A, ViennaCLMatrixType2 &vcl_B, ViennaCLMatrixType3 vcl_C)
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...