1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_ 
   42 #ifndef VIENNACL_OPENMP_MATRIX_MIN_SIZE 
   43   #define VIENNACL_OPENMP_MATRIX_MIN_SIZE  5000 
   57 template<
typename DestNumericT, 
typename SrcNumericT>
 
   60   assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
 
   62   DestNumericT      * data_A = detail::extract_raw_pointer<DestNumericT>(mat1);
 
   63   SrcNumericT 
const * data_B = detail::extract_raw_pointer<SrcNumericT>(mat2);
 
   86 #ifdef VIENNACL_WITH_OPENMP 
   87     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
   89     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
   91         wrapper_A(
row, col) = 
static_cast<DestNumericT
>(wrapper_B(
row, col));
 
   98 #ifdef VIENNACL_WITH_OPENMP 
   99     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  101     for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  103         wrapper_A(
row, col) = 
static_cast<DestNumericT
>(wrapper_B(
row, col));
 
  110          typename SizeT, 
typename DistanceT>
 
  115   const value_type * data_A = detail::extract_raw_pointer<value_type>(proxy.lhs());
 
  116   value_type * data_B       = detail::extract_raw_pointer<value_type>(temp_trans);
 
  136   vcl_size_t row_count = A_size1 / sub_mat_size;
 
  137   vcl_size_t col_count = A_size2 / sub_mat_size;
 
  139   vcl_size_t row_count_remainder = A_size1 % sub_mat_size;
 
  140   vcl_size_t col_count_remainder = A_size2 % sub_mat_size;
 
  142   if (proxy.lhs().row_major())
 
  144 #ifdef VIENNACL_WITH_OPENMP 
  145     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  147     for(
long i = 0; i < static_cast<long>(row_count*col_count); ++i)
 
  153                                                                                , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
 
  154                                                                                , A_inc2, A_internal_size1, A_internal_size2);
 
  156                                                                                , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
 
  157                                                                                , B_inc2, B_internal_size1, B_internal_size2);
 
  158       for(
vcl_size_t j = 0; j < (sub_mat_size); ++j)
 
  159         for(
vcl_size_t k = 0; k < (sub_mat_size); ++k)
 
  160           wrapper_B(j, k) = wrapper_A(k, j);
 
  164                                                                                , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
 
  165                                                                                , A_inc2, A_internal_size1, A_internal_size2);
 
  168                                                                                , B_inc2, B_internal_size1, B_internal_size2);
 
  169       for(
vcl_size_t j = 0; j < col_count_remainder; ++j)
 
  171           wrapper_B(j, k) = wrapper_A(k, j);
 
  176                                                                                , A_inc2, A_internal_size1, A_internal_size2);
 
  178                                                                                , B_start2  + B_inc2 * (row_count * sub_mat_size), B_inc1
 
  179                                                                                , B_inc2, B_internal_size1, B_internal_size2);
 
  180       for(
vcl_size_t j = 0; j < row_count_remainder; ++j)
 
  181         for(
vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
 
  182           wrapper_B(k, j) = wrapper_A(j, k);
 
  187 #ifdef VIENNACL_WITH_OPENMP 
  188     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  190     for(
long i = 0; i < static_cast<long>(row_count*col_count); ++i)
 
  196                                                                                   , A_start2 + A_inc2 * (col * sub_mat_size), A_inc1
 
  197                                                                                   , A_inc2, A_internal_size1, A_internal_size2);
 
  199                                                                                   , B_start2 + B_inc2 * (row * sub_mat_size), B_inc1
 
  200                                                                                   , B_inc2, B_internal_size1, B_internal_size2);
 
  201       for(
vcl_size_t j = 0; j < (sub_mat_size); ++j)
 
  202         for(
vcl_size_t k = 0; k < (sub_mat_size); ++k)
 
  203           wrapper_B(k, j)=wrapper_A(j, k);
 
  207                                                                                   , A_start2 + A_inc2 * (col_count * sub_mat_size), A_inc1
 
  208                                                                                   , A_inc2, A_internal_size1, A_internal_size2);
 
  211                                                                                   , B_inc2, B_internal_size1, B_internal_size2);
 
  212       for(
vcl_size_t j = 0; j < col_count_remainder; ++j)
 
  214           wrapper_B(j, k)=wrapper_A(k, j);
 
  219                                                                                   , A_inc2, A_internal_size1, A_internal_size2);
 
  221                                                                                   , B_start2  + B_inc2 * (row_count * sub_mat_size), B_inc1
 
  222                                                                                   , B_inc2, B_internal_size1, B_internal_size2);
 
  223       for(
vcl_size_t j = 0; j < row_count_remainder; ++j)
 
  224         for(
vcl_size_t k = 0; k < (A_size2 - col_count_remainder); ++k)
 
  225           wrapper_B(k, j)=wrapper_A(j, k);
 
  230 template<
typename NumericT, 
typename ScalarT1>
 
  234   assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
 
  238   value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
 
  239   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
 
  241   value_type data_alpha = alpha;
 
  243     data_alpha = -data_alpha;
 
  266     if (reciprocal_alpha)
 
  268 #ifdef VIENNACL_WITH_OPENMP 
  269       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  271       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  272         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  273           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
 
  277 #ifdef VIENNACL_WITH_OPENMP 
  278       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  280       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  281         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  282           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
 
  290     if (reciprocal_alpha)
 
  292 #ifdef VIENNACL_WITH_OPENMP 
  293       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  295       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  297           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
 
  301 #ifdef VIENNACL_WITH_OPENMP 
  302       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  304       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  306           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
 
  313          typename ScalarT1, 
typename ScalarT2>
 
  322   value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
 
  323   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
 
  324   value_type 
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
 
  326   value_type data_alpha = alpha;
 
  328     data_alpha = -data_alpha;
 
  330   value_type data_beta = beta;
 
  332     data_beta = -data_beta;
 
  363     if (reciprocal_alpha && reciprocal_beta)
 
  365 #ifdef VIENNACL_WITH_OPENMP 
  366       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  368       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  369         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  370           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
 
  372     else if (reciprocal_alpha && !reciprocal_beta)
 
  374 #ifdef VIENNACL_WITH_OPENMP 
  375       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  377       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  378         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  379           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
 
  381     else if (!reciprocal_alpha && reciprocal_beta)
 
  383 #ifdef VIENNACL_WITH_OPENMP 
  384       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  386       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  387         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  388           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
 
  390     else if (!reciprocal_alpha && !reciprocal_beta)
 
  392 #ifdef VIENNACL_WITH_OPENMP 
  393       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  395       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  396         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  397           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
 
  406     if (reciprocal_alpha && reciprocal_beta)
 
  408 #ifdef VIENNACL_WITH_OPENMP 
  409       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  411       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  413           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
 
  415     else if (reciprocal_alpha && !reciprocal_beta)
 
  417 #ifdef VIENNACL_WITH_OPENMP 
  418       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  420       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  422           wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
 
  424     else if (!reciprocal_alpha && reciprocal_beta)
 
  426 #ifdef VIENNACL_WITH_OPENMP 
  427       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  429       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  431           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
 
  433     else if (!reciprocal_alpha && !reciprocal_beta)
 
  435 #ifdef VIENNACL_WITH_OPENMP 
  436       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  438       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  440           wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
 
  448          typename ScalarT1, 
typename ScalarT2>
 
  457   value_type       * data_A = detail::extract_raw_pointer<value_type>(mat1);
 
  458   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
 
  459   value_type 
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
 
  461   value_type data_alpha = alpha;
 
  463     data_alpha = -data_alpha;
 
  465   value_type data_beta = beta;
 
  467     data_beta = -data_beta;
 
  498     if (reciprocal_alpha && reciprocal_beta)
 
  500 #ifdef VIENNACL_WITH_OPENMP 
  501       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  503       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  504         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  505           wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
 
  507     else if (reciprocal_alpha && !reciprocal_beta)
 
  509 #ifdef VIENNACL_WITH_OPENMP 
  510       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  512       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  513         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  514           wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
 
  516     else if (!reciprocal_alpha && reciprocal_beta)
 
  518 #ifdef VIENNACL_WITH_OPENMP 
  519       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  521       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  522         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  523           wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
 
  525     else if (!reciprocal_alpha && !reciprocal_beta)
 
  527 #ifdef VIENNACL_WITH_OPENMP 
  528       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  530       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  531         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  532           wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
 
  541     if (reciprocal_alpha && reciprocal_beta)
 
  543 #ifdef VIENNACL_WITH_OPENMP 
  544       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  546       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  548           wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
 
  550     else if (reciprocal_alpha && !reciprocal_beta)
 
  552 #ifdef VIENNACL_WITH_OPENMP 
  553       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  555       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  557           wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
 
  559     else if (!reciprocal_alpha && reciprocal_beta)
 
  561 #ifdef VIENNACL_WITH_OPENMP 
  562       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  564       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  566           wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
 
  568     else if (!reciprocal_alpha && !reciprocal_beta)
 
  570 #ifdef VIENNACL_WITH_OPENMP 
  571       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  573       for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  575           wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
 
  584 template<
typename NumericT>
 
  589   value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
 
  590   value_type    alpha = 
static_cast<value_type
>(s);
 
  605 #ifdef VIENNACL_WITH_OPENMP 
  606     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  608     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  609       for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  610         wrapper_A(static_cast<vcl_size_t>(
row), col) = alpha;
 
  618 #ifdef VIENNACL_WITH_OPENMP 
  619     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  621     for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  623         wrapper_A(
row, static_cast<vcl_size_t>(col)) = alpha;
 
  631 template<
typename NumericT>
 
  636   value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
 
  637   value_type    alpha = 
static_cast<value_type
>(s);
 
  652 #ifdef VIENNACL_WITH_OPENMP 
  653     #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  655     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  656       wrapper_A(
row, 
row) = alpha;
 
  662 #ifdef VIENNACL_WITH_OPENMP 
  663     #pragma omp parallel for if ((A_size1*A_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  665     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  666       wrapper_A(
row, 
row) = alpha;
 
  670 template<
typename NumericT>
 
  675   value_type       *data_A   = detail::extract_raw_pointer<value_type>(mat);
 
  676   value_type 
const *data_vec = detail::extract_raw_pointer<value_type>(vec);
 
  706       wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
 
  713       wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
 
  717 template<
typename NumericT>
 
  722   value_type 
const * data_A   = detail::extract_raw_pointer<value_type>(mat);
 
  723   value_type       * data_vec = detail::extract_raw_pointer<value_type>(vec);
 
  751       data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
 
  758       data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
 
  762 template<
typename NumericT>
 
  767   value_type 
const * data_A   = detail::extract_raw_pointer<value_type>(mat);
 
  768   value_type       * data_vec = detail::extract_raw_pointer<value_type>(vec);
 
  788       data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
 
  795       data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
 
  799 template<
typename NumericT>
 
  804   value_type 
const * data_A   = detail::extract_raw_pointer<value_type>(mat);
 
  805   value_type       * data_vec = detail::extract_raw_pointer<value_type>(vec);
 
  825       data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
 
  832       data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
 
  847 template<
typename NumericT, 
typename OpT>
 
  851   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  856   value_type       * data_A = detail::extract_raw_pointer<value_type>(A);
 
  857   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
 
  858   value_type 
const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
 
  889 #ifdef VIENNACL_WITH_OPENMP 
  890     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  892     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  893       for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  894         OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
 
  905 #ifdef VIENNACL_WITH_OPENMP 
  906     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  908     for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  910         OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
 
  921 template<
typename NumericT, 
typename OpT>
 
  925   assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
 
  930   value_type       * data_A = detail::extract_raw_pointer<value_type>(A);
 
  931   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
 
  954 #ifdef VIENNACL_WITH_OPENMP 
  955     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  957     for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
  958       for (
vcl_size_t col = 0; col < A_size2; ++col)
 
  959         OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
 
  966 #ifdef VIENNACL_WITH_OPENMP 
  967     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
  969     for (
long col = 0; col < static_cast<long>(A_size2); ++col)
 
  971         OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
 
  992 template<
typename NumericT>
 
  999   value_type 
const * data_A = detail::extract_raw_pointer<value_type>(mat);
 
 1000   value_type 
const * data_x = detail::extract_raw_pointer<value_type>(vec);
 
 1001   value_type       * data_result = detail::extract_raw_pointer<value_type>(result);
 
 1023 #ifdef VIENNACL_WITH_OPENMP 
 1025         thread_count = omp_get_max_threads();
 
 1027       std::vector<value_type> temp_array(A_size2*thread_count, 0);
 
 1030       for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1031         wrapper_res(col) = 0;
 
 1033 #ifdef VIENNACL_WITH_OPENMP 
 1034       #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1038 #ifdef VIENNACL_WITH_OPENMP 
 1040           id = omp_get_thread_num();
 
 1042         vcl_size_t begin = (A_size1 * id) / thread_count;
 
 1043         vcl_size_t end   = (A_size1 * (
id + 1)) / thread_count;
 
 1050           value_type temp = wrapper_vec(
row);
 
 1051           for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1052             temp_array[A_size2 * 
id + col] += wrapper_mat(
row , col) * temp;
 
 1055       for (
vcl_size_t id = 0; 
id < thread_count; ++id)
 
 1056         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1057           wrapper_res(col) += temp_array[A_size2 * 
id + col];
 
 1062 #ifdef VIENNACL_WITH_OPENMP 
 1063       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1065       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
 1067         value_type temp = 0;
 
 1068         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1069           temp += data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
 
 1071         data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
 
 1080 #ifdef VIENNACL_WITH_OPENMP 
 1082         thread_count = omp_get_max_threads();
 
 1084       std::vector<value_type> temp_array(A_size1*thread_count, 0);
 
 1088         wrapper_res(
row) = 0;
 
 1090 #ifdef VIENNACL_WITH_OPENMP 
 1091       #pragma omp parallel if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1095 #ifdef VIENNACL_WITH_OPENMP 
 1097           id = omp_get_thread_num();
 
 1099         vcl_size_t begin = (A_size2 * id) / thread_count;
 
 1100         vcl_size_t end   = (A_size2 * (
id + 1)) / thread_count;
 
 1105         for (
vcl_size_t col = 0; col < (end - begin); ++col)  
 
 1107           value_type temp = wrapper_vec(col);
 
 1109             temp_array[A_size1 * 
id + 
row] += wrapper_mat(
row , col) * temp;
 
 1112       for (
vcl_size_t id = 0; 
id < thread_count; ++id)
 
 1114           wrapper_res(
row) += temp_array[A_size1 * 
id + 
row];
 
 1118 #ifdef VIENNACL_WITH_OPENMP 
 1119       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1121       for (
long row = 0; row < static_cast<long>(A_size2); ++
row)
 
 1123         value_type temp = 0;
 
 1124         for (
vcl_size_t col = 0; col < A_size1; ++col)
 
 1125           temp += data_A[
viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(
row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
 
 1127         data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
 
 1141   template<
typename MatrixAccT1, 
typename MatrixAccT2, 
typename MatrixAccT3, 
typename NumericT>
 
 1142   void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
 
 1146     if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
 
 1151     vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
 
 1152     vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
 
 1153     vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
 
 1158 #ifdef VIENNACL_WITH_OPENMP 
 1159     #pragma omp parallel for if ((C_size1*C_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1161     for (
long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
 
 1164       std::vector<NumericT> buffer_A(blocksize * blocksize); 
 
 1165       std::vector<NumericT> buffer_B(blocksize * blocksize); 
 
 1166       std::vector<NumericT> buffer_C(blocksize * blocksize); 
 
 1169       for (
vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
 
 1178         for (
vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
 
 1189               buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
 
 1193               buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
 
 1198             NumericT const * ptrA = &(buffer_A[i*blocksize]);
 
 1201               NumericT const * ptrB = &(buffer_B[j*blocksize]);
 
 1205                 temp += ptrA[k] * ptrB[k];  
 
 1207               buffer_C[i*blocksize + j] += temp;
 
 1213         if (beta > 0 || beta < 0)
 
 1217               C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
 
 1223               C(i,j) =                 alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
 
 1238 template<
typename NumericT, 
typename ScalarT1, 
typename ScalarT2 >
 
 1247   value_type 
const * data_A = detail::extract_raw_pointer<value_type>(A);
 
 1248   value_type 
const * data_B = detail::extract_raw_pointer<value_type>(B);
 
 1249   value_type       * data_C = detail::extract_raw_pointer<value_type>(C);
 
 1276   if (!trans_A && !trans_B)
 
 1284       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1292       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1300       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1308       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1316       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1324       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1332       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1340       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1343   else if (!trans_A && trans_B)
 
 1351       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1359       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1367       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1375       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1383       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1391       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1399       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1407       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1410   else if (trans_A && !trans_B)
 
 1418       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1426       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1434       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1442       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1450       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1458       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1466       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1474       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1477   else if (trans_A && trans_B)
 
 1485       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1493       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1501       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1509       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1517       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1525       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1533       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1541       detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
 
 1565 template<
typename NumericT, 
typename ScalarT>
 
 1567                           ScalarT 
const & alpha, 
vcl_size_t , 
bool reciprocal_alpha, 
bool flip_sign_alpha,
 
 1573   value_type       * data_A  = detail::extract_raw_pointer<value_type>(mat1);
 
 1574   value_type 
const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
 
 1575   value_type 
const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
 
 1592   value_type data_alpha = alpha;
 
 1593   if (flip_sign_alpha)
 
 1594     data_alpha = -data_alpha;
 
 1598     if(reciprocal_alpha)
 
 1600 #ifdef VIENNACL_WITH_OPENMP 
 1601       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1603       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
 1605         value_type value_v1 = data_v1[
static_cast<vcl_size_t>(
row) * inc1 + start1] / data_alpha;
 
 1606         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1607           data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
 
 1612 #ifdef VIENNACL_WITH_OPENMP 
 1613       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1615       for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
 
 1617         value_type value_v1 = data_v1[
static_cast<vcl_size_t>(
row) * inc1 + start1] * data_alpha;
 
 1618         for (
vcl_size_t col = 0; col < A_size2; ++col)
 
 1619           data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
 
 1625       if(reciprocal_alpha)
 
 1627 #ifdef VIENNACL_WITH_OPENMP 
 1628         #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1630         for (
long col = 0; col < static_cast<long>(A_size2); ++col)  
 
 1632           value_type value_v2 = data_v2[
static_cast<vcl_size_t>(col) * inc2 + start2] / data_alpha;
 
 1634             data_A[
viennacl::column_major::mem_index(
row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[
row * inc1 + start1] * value_v2;
 
 1639 #ifdef VIENNACL_WITH_OPENMP 
 1640         #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1642         for (
long col = 0; col < static_cast<long>(A_size2); ++col)  
 
 1644           value_type value_v2 = data_v2[
static_cast<vcl_size_t>(col) * inc2 + start2] * data_alpha;
 
 1646             data_A[
viennacl::column_major::mem_index(
row * A_inc1 + A_start1, static_cast<vcl_size_t>(col) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[
row * inc1 + start1] * value_v2;
 
 1660 template <
typename NumericT, 
typename S1>
 
 1669    value_type * data_A  = detail::extract_raw_pointer<value_type>(A);
 
 1670    value_type * data_D = detail::extract_raw_pointer<value_type>(D);
 
 1671    value_type * data_S = detail::extract_raw_pointer<value_type>(S);
 
 1691 #ifdef VIENNACL_WITH_OPENMP 
 1692      #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1694      for(
long i2 = 0;  i2 < long(size) - 1; i2++)
 
 1697        data_D[start1 + inc1 * i] =        data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1698        data_S[start2 + inc2 * (i + 1)] =  data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1700      data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1705 #ifdef VIENNACL_WITH_OPENMP 
 1706      #pragma omp parallel for if ((size1*size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1708      for(
long i2 = 0;  i2 < long(size) - 1; i2++)
 
 1712        data_S[start2 + inc2 * (i + 1)] =  data_A[
viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1714      data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1721  template <
typename NumericT, 
typename VectorType>
 
 1738  template <
typename NumericT>
 
 1747    value_type * data_A  = detail::extract_raw_pointer<value_type>(A);
 
 1748    value_type * data_D = detail::extract_raw_pointer<value_type>(D);
 
 1767            for(
vcl_size_t j = row_start; j < A_size1; j++)
 
 1768                ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1769 #ifdef VIENNACL_WITH_OPENMP 
 1770            #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1772            for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
 
 1773                data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
 
 1774                    data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
 
 1775                    (2 * data_D[start1 + inc1 * 
static_cast<vcl_size_t>(j)]* ss);
 
 1783            for(
vcl_size_t j = row_start; j < A_size1; j++)
 
 1784                ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 1785 #ifdef VIENNACL_WITH_OPENMP 
 1786            #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1788            for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
 
 1791                    (2 * data_D[start1 + inc1 * 
static_cast<vcl_size_t>(j)]* ss);
 
 1803  template <
typename NumericT>
 
 1810    value_type * data_A  = detail::extract_raw_pointer<value_type>(A);
 
 1811    value_type * data_D = detail::extract_raw_pointer<value_type>(D);
 
 1831                ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
 
 1834 #ifdef VIENNACL_WITH_OPENMP 
 1835            #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1837            for(
long j = 0; j < static_cast<long>(A_size2); j++) 
 
 1839                    data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * 
static_cast<vcl_size_t>(j)] * sum_Av);
 
 1848                ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
 
 1851 #ifdef VIENNACL_WITH_OPENMP 
 1852            #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1854            for(
long j = 0; j < static_cast<long>(A_size2); j++) 
 
 1856                    data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * 
static_cast<vcl_size_t>(j)] * sum_Av);
 
 1869  template <
typename NumericT>
 
 1895  template<
typename NumericT>
 
 1905      value_type * data_Q  = detail::extract_raw_pointer<value_type>(Q);
 
 1906      value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
 
 1907      value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
 
 1925          for( 
int i = m - 1; i >= l; i--)
 
 1927 #ifdef VIENNACL_WITH_OPENMP 
 1928              #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1930              for(
long k = 0; k < static_cast<long>(Q_size1); k++)
 
 1948          for( 
int i = m - 1; i >= l; i--)
 
 1950 #ifdef VIENNACL_WITH_OPENMP 
 1951              #pragma omp parallel for if ((Q_size1*Q_size1) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 1953              for(
long k = 0; k < static_cast<long>(Q_size1); k++)
 
 1982 template <
typename NumericT, 
typename S1>
 
 1988   value_type * data_A  = detail::extract_raw_pointer<value_type>(A);
 
 1989   value_type * data_V = detail::extract_raw_pointer<value_type>(V);
 
 2004 #ifdef VIENNACL_WITH_OPENMP 
 2005     #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 2007       for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
 
 2009         data_V[i - 
static_cast<long>(row_start)] = data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 2014 #ifdef VIENNACL_WITH_OPENMP 
 2015       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 2017       for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
 
 2019         data_V[i - 
static_cast<long>(row_start)] = data_A[
viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 2027 #ifdef VIENNACL_WITH_OPENMP 
 2028       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 2030       for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
 
 2032         data_V[i - 
static_cast<long>(col_start)] = data_A[
viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
 2037 #ifdef VIENNACL_WITH_OPENMP 
 2038       #pragma omp parallel for if ((A_size1*A_size2) > VIENNACL_OPENMP_MATRIX_MIN_SIZE) 
 2040       for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size2); i++)
 
 2042         data_V[i - 
static_cast<long>(col_start)] = data_A[
viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
 
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value. 
Helper class for accessing a strided subvector of a larger vector. 
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix. 
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors. 
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types. 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
Worker class for decomposing expression templates. 
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies. 
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix. 
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector. 
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
#define VIENNACL_OPENMP_MATRIX_MIN_SIZE
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors. 
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common routines for single-threaded or OpenMP-enabled execution on CPU. 
Proxy classes for vectors. 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm. 
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices. 
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix. 
T min(const T &lhs, const T &rhs)
Minimum. 
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class. 
A collection of compile time type deductions. 
Simple enable-if variant that uses the SFINAE pattern.