1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_ 
   36 #ifdef VIENNACL_WITH_OPENMP 
   52   template<
typename NumericT, 
unsigned int AlignmentV>
 
   57     NumericT         * result_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
   58     NumericT   const * elements   = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
   59     unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
 
   60     unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
   65       unsigned int row_end = row_buffer[
row+1];
 
   67       switch (info_selector)
 
   70           for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
 
   71             value = std::max<NumericT>(value, std::fabs(elements[i]));
 
   75           for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
 
   76             value += std::fabs(elements[i]);
 
   80           for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
 
   81             value += elements[i] * elements[i];
 
   82           value = std::sqrt(value);
 
   86           for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
 
   88             if (col_buffer[i] == 
row)
 
   96       result_buf[
row] = value;
 
  110 template<
typename NumericT, 
unsigned int AlignmentV>
 
  117   NumericT           * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
 
  118   NumericT     const * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  119   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
  120   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
 
  121   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
  123 #ifdef VIENNACL_WITH_OPENMP 
  124   #pragma omp parallel for 
  126   for (
long row = 0; row < static_cast<long>(mat.
size1()); ++
row)
 
  131       dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.
stride() + vec.
start()];
 
  133     if (beta < 0 || beta > 0)
 
  136       result_buf[index] = alpha * dot_prod + beta * result_buf[index];
 
  152 template<
typename NumericT, 
unsigned int AlignmentV>
 
  157   NumericT     const * sp_mat_elements   = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
  158   unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
 
  159   unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
 
  161   NumericT const * d_mat_data  = detail::extract_raw_pointer<NumericT>(d_mat);
 
  162   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
  179       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
  181       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
  184       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
  186       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
  189 #ifdef VIENNACL_WITH_OPENMP 
  190   #pragma omp parallel for 
  192     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
 
  197         for (
vcl_size_t k = row_start; k < row_end; ++k) {
 
  198           temp += sp_mat_elements[k] * d_mat_wrapper_row(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), col);
 
  201           result_wrapper_row(
row, col) = temp;
 
  203           result_wrapper_col(
row, col) = temp;
 
  208 #ifdef VIENNACL_WITH_OPENMP 
  209   #pragma omp parallel for 
  211     for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
 
  212       for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
 
  216         for (
vcl_size_t k = row_start; k < row_end; ++k) {
 
  217           temp += sp_mat_elements[k] * d_mat_wrapper_col(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), static_cast<vcl_size_t>(col));
 
  220           result_wrapper_row(
row, col) = temp;
 
  222           result_wrapper_col(
row, col) = temp;
 
  238 template<
typename NumericT, 
unsigned int AlignmentV>
 
  245   NumericT     const * sp_mat_elements   = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
  246   unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
 
  247   unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
 
  249   NumericT const *  d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
 
  250   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
  267       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
  269       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
  272       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
  274       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
  276   if ( d_mat.lhs().row_major() ) {
 
  277 #ifdef VIENNACL_WITH_OPENMP 
  278   #pragma omp parallel for 
  280     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
 
  283       for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
  285         for (
vcl_size_t k = row_start; k < row_end; ++k) {
 
  286           temp += sp_mat_elements[k] * d_mat_wrapper_row(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
 
  289           result_wrapper_row(
row, col) = temp;
 
  291           result_wrapper_col(
row, col) = temp;
 
  296 #ifdef VIENNACL_WITH_OPENMP 
  297   #pragma omp parallel for 
  299     for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
 
  304         for (
vcl_size_t k = row_start; k < row_end; ++k) {
 
  305           temp += sp_mat_elements[k] * d_mat_wrapper_col(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
 
  308           result_wrapper_row(
row, col) = temp;
 
  310           result_wrapper_col(
row, col) = temp;
 
  327 template<
typename NumericT, 
unsigned int AlignmentV>
 
  333   NumericT     const * A_elements   = detail::extract_raw_pointer<NumericT>(A.
handle());
 
  334   unsigned int const * A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
  335   unsigned int const * A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
  337   NumericT     const * B_elements   = detail::extract_raw_pointer<NumericT>(B.
handle());
 
  338   unsigned int const * B_row_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle1());
 
  339   unsigned int const * B_col_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle2());
 
  342   unsigned int * C_row_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle1());
 
  344 #if defined(VIENNACL_WITH_OPENMP) 
  345   unsigned int block_factor = 10;
 
  346   unsigned int max_threads = omp_get_max_threads();
 
  347   long chunk_size = long(A.
size1()) / 
long(block_factor * max_threads) + 1;
 
  349   unsigned int max_threads = 1;
 
  351   std::vector<unsigned int> max_length_row_C(max_threads);
 
  352   std::vector<unsigned int *> row_C_temp_index_buffers(max_threads);
 
  353   std::vector<NumericT *>     row_C_temp_value_buffers(max_threads);
 
  360 #if defined(VIENNACL_WITH_OPENMP) 
  361   #pragma omp parallel for schedule(dynamic, chunk_size) 
  363   for (
long i=0; i<long(A.
size1()); ++i)
 
  365     unsigned int row_start_A = A_row_buffer[i];
 
  366     unsigned int row_end_A   = A_row_buffer[i+1];
 
  368     unsigned int row_C_upper_bound_row = 0;
 
  369     for (
unsigned int j = row_start_A; j<row_end_A; ++j)
 
  371       unsigned int row_B = A_col_buffer[j];
 
  373       unsigned int entries_in_row = B_row_buffer[row_B+1] - B_row_buffer[row_B];
 
  374       row_C_upper_bound_row += entries_in_row;
 
  377 #ifdef VIENNACL_WITH_OPENMP 
  378     unsigned int thread_id = omp_get_thread_num();
 
  380     unsigned int thread_id = 0;
 
  383     max_length_row_C[thread_id] = 
std::max(max_length_row_C[thread_id], 
std::min(row_C_upper_bound_row, static_cast<unsigned int>(B.
size2())));
 
  387   for (std::size_t i=1; i<max_length_row_C.size(); ++i)
 
  388     max_length_row_C[0] = 
std::max(max_length_row_C[0], max_length_row_C[i]);
 
  391   for (
unsigned int i=0; i<max_threads; ++i)
 
  392     row_C_temp_index_buffers[i] = (
unsigned int *)malloc(
sizeof(
unsigned int)*3*max_length_row_C[0]);
 
  399 #ifdef VIENNACL_WITH_OPENMP 
  400   #pragma omp parallel for schedule(dynamic, chunk_size) 
  402   for (
long i=0; i<long(A.
size1()); ++i)
 
  404     unsigned int thread_id = 0;
 
  405   #ifdef VIENNACL_WITH_OPENMP 
  406     thread_id = omp_get_thread_num();
 
  408     unsigned int buffer_len = max_length_row_C[0];
 
  410     unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
 
  411     unsigned int *row_C_vector_2 = row_C_vector_1 + buffer_len;
 
  412     unsigned int *row_C_vector_3 = row_C_vector_2 + buffer_len;
 
  414     unsigned int row_start_A = A_row_buffer[i];
 
  415     unsigned int row_end_A   = A_row_buffer[i+1];
 
  418                                                  B_row_buffer, B_col_buffer, static_cast<unsigned int>(B.
size2()),
 
  419                                                  row_C_vector_1, row_C_vector_2, row_C_vector_3);
 
  423   unsigned int current_offset = 0;
 
  424   for (std::size_t i=0; i<C.
size1(); ++i)
 
  426     unsigned int tmp = C_row_buffer[i];
 
  427     C_row_buffer[i] = current_offset;
 
  428     current_offset += tmp;
 
  430   C_row_buffer[C.
size1()] = current_offset;
 
  431   C.
reserve(current_offset, 
false);
 
  434   for (
unsigned int i=0; i<max_threads; ++i)
 
  435     row_C_temp_value_buffers[i] = (
NumericT *)malloc(
sizeof(
NumericT)*3*max_length_row_C[0]);
 
  440   NumericT     * C_elements   = detail::extract_raw_pointer<NumericT>(C.
handle());
 
  441   unsigned int * C_col_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle2());
 
  443 #ifdef VIENNACL_WITH_OPENMP 
  444   #pragma omp parallel for schedule(dynamic, chunk_size) 
  446   for (
long i = 0; i < long(A.
size1()); ++i)
 
  448     unsigned int row_start_A  = A_row_buffer[i];
 
  449     unsigned int row_end_A    = A_row_buffer[i+1];
 
  451     unsigned int row_C_buffer_start = C_row_buffer[i];
 
  452     unsigned int row_C_buffer_end   = C_row_buffer[i+1];
 
  454 #ifdef VIENNACL_WITH_OPENMP 
  455     unsigned int thread_id = omp_get_thread_num();
 
  457     unsigned int thread_id = 0;
 
  460     unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
 
  461     unsigned int *row_C_vector_2 = row_C_vector_1 + max_length_row_C[0];
 
  462     unsigned int *row_C_vector_3 = row_C_vector_2 + max_length_row_C[0];
 
  464     NumericT *row_C_vector_1_values = row_C_temp_value_buffers[thread_id];
 
  465     NumericT *row_C_vector_2_values = row_C_vector_1_values + max_length_row_C[0];
 
  466     NumericT *row_C_vector_3_values = row_C_vector_2_values + max_length_row_C[0];
 
  469                               B_row_buffer, B_col_buffer, B_elements, static_cast<unsigned int>(B.
size2()),
 
  470                               row_C_buffer_start, row_C_buffer_end, C_col_buffer, C_elements,
 
  471                               row_C_vector_1, row_C_vector_1_values,
 
  472                               row_C_vector_2, row_C_vector_2_values,
 
  473                               row_C_vector_3, row_C_vector_3_values);
 
  477   for (
unsigned int i=0; i<max_threads; ++i)
 
  479     free(row_C_temp_index_buffers[i]);
 
  480     free(row_C_temp_value_buffers[i]);
 
  493   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  495                          IndexArrayT 
const & col_buffer,
 
  496                          ConstScalarArrayT 
const & element_buffer,
 
  497                          ScalarArrayT & vec_buffer,
 
  506       for (
vcl_size_t i = row_begin; i < row_end; ++i)
 
  510           vec_entry -= vec_buffer[col_index] * element_buffer[i];
 
  512       vec_buffer[
row] = vec_entry;
 
  517   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  519                          IndexArrayT 
const & col_buffer,
 
  520                          ConstScalarArrayT 
const & element_buffer,
 
  521                          ScalarArrayT & vec_buffer,
 
  533       for (
vcl_size_t i = row_begin; i < row_end; ++i)
 
  537           vec_entry -= vec_buffer[col_index] * element_buffer[i];
 
  538         else if (col_index == 
row)
 
  539           diagonal_entry = element_buffer[i];
 
  542       vec_buffer[
row] = vec_entry / diagonal_entry;
 
  548   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  550                          IndexArrayT 
const & col_buffer,
 
  551                          ConstScalarArrayT 
const & element_buffer,
 
  552                          ScalarArrayT & vec_buffer,
 
  556     for (
vcl_size_t row2 = 1; row2 < num_cols; ++row2)
 
  562       for (
vcl_size_t i = row_begin; i < row_end; ++i)
 
  566           vec_entry -= vec_buffer[col_index] * element_buffer[i];
 
  568       vec_buffer[
row] = vec_entry;
 
  572   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  574                          IndexArrayT 
const & col_buffer,
 
  575                          ConstScalarArrayT 
const & element_buffer,
 
  576                          ScalarArrayT & vec_buffer,
 
  580     for (
vcl_size_t row2 = 0; row2 < num_cols; ++row2)
 
  589       for (
vcl_size_t i = row_begin; i < row_end; ++i)
 
  593           vec_entry -= vec_buffer[col_index] * element_buffer[i];
 
  594         else if (col_index == row)
 
  595           diagonal_entry = element_buffer[i];
 
  598       vec_buffer[
row] = vec_entry / diagonal_entry;
 
  612 template<
typename NumericT, 
unsigned int AlignmentV>
 
  617   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  618   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  619   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  620   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  622   detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
 
  631 template<
typename NumericT, 
unsigned int AlignmentV>
 
  636   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  637   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  638   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  639   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  641   detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
 
  651 template<
typename NumericT, 
unsigned int AlignmentV>
 
  656   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  657   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(U.
handle());
 
  658   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
 
  659   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
 
  661   detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
 
  670 template<
typename NumericT, 
unsigned int AlignmentV>
 
  675   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  676   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(U.
handle());
 
  677   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
 
  678   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
 
  680   detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
 
  695   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  697                                IndexArrayT 
const & col_buffer,
 
  698                                ConstScalarArrayT 
const & element_buffer,
 
  699                                ScalarArrayT & vec_buffer,
 
  704     for (
vcl_size_t col = 0; col < num_cols; ++col)
 
  706       NumericT vec_entry = vec_buffer[col];
 
  708       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  710         unsigned int row_index = col_buffer[i];
 
  712           vec_buffer[row_index] -= vec_entry * element_buffer[i];
 
  718   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  720                                IndexArrayT 
const & col_buffer,
 
  721                                ConstScalarArrayT 
const & element_buffer,
 
  722                                ScalarArrayT & vec_buffer,
 
  727     for (
vcl_size_t col = 0; col < num_cols; ++col)
 
  733       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  736         if (row_index == col)
 
  738           diagonal_entry = element_buffer[i];
 
  744       NumericT vec_entry = vec_buffer[col] / diagonal_entry;
 
  745       vec_buffer[col] = vec_entry;
 
  746       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  750           vec_buffer[row_index] -= vec_entry * element_buffer[i];
 
  756   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  758                                IndexArrayT 
const & col_buffer,
 
  759                                ConstScalarArrayT 
const & element_buffer,
 
  760                                ScalarArrayT & vec_buffer,
 
  764     for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
 
  768       NumericT vec_entry = vec_buffer[col];
 
  771       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  775           vec_buffer[row_index] -= vec_entry * element_buffer[i];
 
  781   template<
typename NumericT, 
typename ConstScalarArrayT, 
typename ScalarArrayT, 
typename IndexArrayT>
 
  783                                IndexArrayT 
const & col_buffer,
 
  784                                ConstScalarArrayT 
const & element_buffer,
 
  785                                ScalarArrayT & vec_buffer,
 
  789     for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
 
  797       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  800         if (row_index == col)
 
  802           diagonal_entry = element_buffer[i];
 
  808       NumericT vec_entry = vec_buffer[col] / diagonal_entry;
 
  809       vec_buffer[col] = vec_entry;
 
  810       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  814           vec_buffer[row_index] -= vec_entry * element_buffer[i];
 
  823   template<
typename NumericT, 
unsigned int AlignmentV>
 
  834     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
 
  835     unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
 
  836     NumericT     const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
 
  837     NumericT           * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  840     for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
 
  842       NumericT vec_entry = vec_buffer[col];
 
  844       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  846         unsigned int row_index = col_buffer[i];
 
  848           vec_buffer[row_index] -= vec_entry * elements[i];
 
  854   template<
typename NumericT, 
unsigned int AlignmentV>
 
  865     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
 
  866     unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
 
  867     NumericT     const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
 
  868     NumericT     const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(L_diagonal.
handle());
 
  869     NumericT           * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  872     for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
 
  876       NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
 
  877       vec_buffer[col] = vec_entry;
 
  878       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  882           vec_buffer[row_index] -= vec_entry * elements[i];
 
  890   template<
typename NumericT, 
unsigned int AlignmentV>
 
  901     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
 
  902     unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
 
  903     NumericT     const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
 
  904     NumericT           * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  906     for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
 
  908       vcl_size_t col = (U.lhs().size1() - col2) - 1;
 
  910       NumericT vec_entry = vec_buffer[col];
 
  913       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  917           vec_buffer[row_index] -= vec_entry * elements[i];
 
  923   template<
typename NumericT, 
unsigned int AlignmentV>
 
  934     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
 
  935     unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
 
  936     NumericT     const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
 
  937     NumericT     const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(U_diagonal.
handle());
 
  938     NumericT           * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  940     for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
 
  942       vcl_size_t col = (U.lhs().size1() - col2) - 1;
 
  947       NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
 
  948       vec_buffer[col] = vec_entry;
 
  949       for (
vcl_size_t i = col_begin; i < col_end; ++i)
 
  953           vec_buffer[row_index] -= vec_entry * elements[i];
 
  967 template<
typename NumericT, 
unsigned int AlignmentV>
 
  974   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  975   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
 
  976   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
 
  977   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
 
  979   detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
 
  988 template<
typename NumericT, 
unsigned int AlignmentV>
 
  995   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
  996   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
 
  997   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
 
  998   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
 
 1000   detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
 
 1010 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1017   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1018   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
 
 1019   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
 
 1020   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
 
 1022   detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
 
 1032 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1039   NumericT           * vec_buf    = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1040   NumericT     const * elements   = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
 
 1041   unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
 
 1042   unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
 
 1044   detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
 
 1061 template<
typename NumericT>
 
 1068   NumericT           * result_buf  = detail::extract_raw_pointer<NumericT>(result.
handle());
 
 1069   NumericT     const * vec_buf     = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1070   NumericT     const * elements    = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1071   unsigned int const * row_buffer  = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
 
 1072   unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
 
 1073   unsigned int const * col_buffer  = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
 1075   if (beta < 0 || beta > 0)
 
 1078       result_buf[i * result.
stride() + result.
start()] *= beta;
 
 1083       result_buf[i * result.
stride() + result.
start()] = 0;
 
 1086 #ifdef VIENNACL_WITH_OPENMP 
 1087   #pragma omp parallel for 
 1089   for (
long i = 0; i < static_cast<long>(mat.
nnz1()); ++i)
 
 1093     for (
vcl_size_t j = row_buffer[i]; j < row_end; ++j)
 
 1094       dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.
stride() + vec.
start()];
 
 1096     if (beta > 0 || beta < 0)
 
 1112   template<
typename NumericT, 
unsigned int AlignmentV>
 
 1117     NumericT           * result_buf   = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1118     NumericT     const * elements     = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1119     unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
 
 1122     unsigned int last_row = 0;
 
 1126       unsigned int current_row = coord_buffer[2*i];
 
 1128       if (current_row != last_row)
 
 1131           value = std::sqrt(value);
 
 1133         result_buf[last_row] = value;
 
 1135         last_row = current_row;
 
 1138       switch (info_selector)
 
 1141           value = std::max<NumericT>(value, std::fabs(elements[i]));
 
 1145           value += std::fabs(elements[i]);
 
 1149           value += elements[i] * elements[i];
 
 1153           if (coord_buffer[2*i+1] == current_row)
 
 1154             value = elements[i];
 
 1163       value = std::sqrt(value);
 
 1165     result_buf[last_row] = value;
 
 1177 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1184   NumericT           * result_buf   = detail::extract_raw_pointer<NumericT>(result.
handle());
 
 1185   NumericT     const * vec_buf      = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1186   NumericT     const * elements     = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1187   unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
 
 1189   if (beta < 0 || beta > 0)
 
 1192       result_buf[i * result.
stride() + result.
start()] *= beta;
 
 1197       result_buf[i * result.
stride() + result.
start()] = 0;
 
 1201     result_buf[coord_buffer[2*i] * result.
stride() + result.
start()]
 
 1202       += alpha * elements[i] * vec_buf[coord_buffer[2*i+1] * vec.
stride() + vec.
start()];
 
 1213 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1218   NumericT     const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
 1219   unsigned int const * sp_mat_coords   = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
 
 1221   NumericT const * d_mat_data  = detail::extract_raw_pointer<NumericT>(d_mat);
 
 1222   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 1239       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1241       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1244       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1246       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1250 #ifdef VIENNACL_WITH_OPENMP 
 1251   #pragma omp parallel for 
 1253     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1263 #ifdef VIENNACL_WITH_OPENMP 
 1264   #pragma omp parallel for 
 1266     for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
 
 1271         NumericT y = d_mat_wrapper_row( c, col);
 
 1273           result_wrapper_row(r, col) += x * y;
 
 1275           result_wrapper_col(r, col) += x * y;
 
 1282 #ifdef VIENNACL_WITH_OPENMP 
 1283   #pragma omp parallel for 
 1285     for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
 
 1295 #ifdef VIENNACL_WITH_OPENMP 
 1296   #pragma omp parallel for 
 1298     for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
 
 1305         NumericT y = d_mat_wrapper_col( c, col);
 
 1308           result_wrapper_row( r, col) += x*y;
 
 1310           result_wrapper_col( r, col) += x*y;
 
 1327 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1334   NumericT     const * sp_mat_elements     = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
 1335   unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
 
 1337   NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
 
 1338   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 1355       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1357       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1360       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1362       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1364   if ( d_mat.lhs().row_major() )
 
 1366 #ifdef VIENNACL_WITH_OPENMP 
 1367   #pragma omp parallel for 
 1369     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1372         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1375         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1379 #ifdef VIENNACL_WITH_OPENMP 
 1380     #pragma omp parallel for 
 1382     for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
 
 1388         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
 1389           NumericT y = d_mat_wrapper_row( col, c);
 
 1390           result_wrapper_row(r, col) += x * y;
 
 1395         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
 1396           NumericT y = d_mat_wrapper_row( col, c);
 
 1397           result_wrapper_col(r, col) += x * y;
 
 1406 #ifdef VIENNACL_WITH_OPENMP 
 1407   #pragma omp parallel for 
 1409     for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
 
 1419 #ifdef VIENNACL_WITH_OPENMP 
 1420   #pragma omp parallel for 
 1422     for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
 
 1428         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
 1429           NumericT y = d_mat_wrapper_col( col, c);
 
 1430           result_wrapper_row(r, col) += x * y;
 
 1435         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
 1436           NumericT y = d_mat_wrapper_col( col, c);
 
 1437           result_wrapper_col(r, col) += x * y;
 
 1458 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1465   NumericT           * result_buf   = detail::extract_raw_pointer<NumericT>(result.
handle());
 
 1466   NumericT     const * vec_buf      = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1467   NumericT     const * elements     = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1468   unsigned int const * coords       = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
 1474     for (
unsigned int item_id = 0; item_id < mat.
internal_maxnnz(); ++item_id)
 
 1479       if (val > 0 || val < 0)
 
 1481         unsigned int col = coords[offset];
 
 1482         sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
 
 1486     if (beta < 0 || beta > 0)
 
 1489       result_buf[index] = alpha * sum + beta * result_buf[index];
 
 1504 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1509   NumericT     const * sp_mat_elements     = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
 1510   unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
 
 1512   NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
 
 1513   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 1530       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1532       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1535       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1537       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1540 #ifdef VIENNACL_WITH_OPENMP 
 1541   #pragma omp parallel for 
 1543     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1553 #ifdef VIENNACL_WITH_OPENMP 
 1554   #pragma omp parallel for 
 1556     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1558       for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id)
 
 1564         if (sp_mat_val < 0 || sp_mat_val > 0) 
 
 1568               result_wrapper_row(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
 
 1571               result_wrapper_col(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
 
 1577 #ifdef VIENNACL_WITH_OPENMP 
 1578   #pragma omp parallel for 
 1580     for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
 
 1583         for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1586         for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1590 #ifdef VIENNACL_WITH_OPENMP 
 1591   #pragma omp parallel for 
 1593     for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
 
 1595       for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
 
 1603           if (sp_mat_val < 0 || sp_mat_val > 0)  
 
 1606               result_wrapper_row( 
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
 
 1608               result_wrapper_col( 
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
 
 1626 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1633   NumericT     const * sp_mat_elements     = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
 
 1634   unsigned int const * sp_mat_coords       = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
 
 1636   NumericT const * d_mat_data  = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
 
 1637   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 1654       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1656       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1659       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1661       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1663   if ( d_mat.lhs().row_major() )
 
 1665 #ifdef VIENNACL_WITH_OPENMP 
 1666     #pragma omp parallel for 
 1668     for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
 
 1671         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1674         for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1678     for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
 
 1680       for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
 
 1688           if (sp_mat_val < 0 || sp_mat_val > 0) 
 
 1691               result_wrapper_row( 
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
 
 1693               result_wrapper_col( 
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
 
 1701 #ifdef VIENNACL_WITH_OPENMP 
 1702   #pragma omp parallel for 
 1704     for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
 
 1714 #ifdef VIENNACL_WITH_OPENMP 
 1715   #pragma omp parallel for 
 1719       for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id) {
 
 1725         if (sp_mat_val < 0 || sp_mat_val > 0)  
 
 1728             for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1729               result_wrapper_row( 
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
 
 1731             for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
 
 1732               result_wrapper_col( 
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
 
 1752 template<
typename NumericT, 
typename IndexT>
 
 1759   NumericT       * result_buf        = detail::extract_raw_pointer<NumericT>(result.
handle());
 
 1760   NumericT const * vec_buf           = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1761   NumericT const * elements          = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1762   IndexT   
const * columns_per_block = detail::extract_raw_pointer<IndexT>(mat.
handle1());
 
 1763   IndexT   
const * column_indices    = detail::extract_raw_pointer<IndexT>(mat.
handle2());
 
 1764   IndexT   
const * block_start       = detail::extract_raw_pointer<IndexT>(mat.
handle3());
 
 1768 #ifdef VIENNACL_WITH_OPENMP 
 1769   #pragma omp parallel for 
 1771   for (
long block_idx2 = 0; block_idx2 < static_cast<long>(num_blocks); ++block_idx2)
 
 1774     vcl_size_t current_columns_per_block = columns_per_block[block_idx];
 
 1778     for (IndexT column_entry_index = 0;
 
 1779                 column_entry_index < current_columns_per_block;
 
 1780               ++column_entry_index)
 
 1785       for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
 
 1787         NumericT val = elements[stride_start + row_in_block];
 
 1789         result_values[row_in_block] += (val > 0 || val < 0) ? vec_buf[column_indices[stride_start + row_in_block] * vec.
stride() + vec.
start()] * val : 0;
 
 1794     if (beta < 0 || beta > 0)
 
 1796       for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
 
 1798         if (first_row_in_matrix + row_in_block < result.
size())
 
 1801           result_buf[index] = alpha * result_values[row_in_block] + beta * result_buf[index];
 
 1807       for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
 
 1809         if (first_row_in_matrix + row_in_block < result.
size())
 
 1810           result_buf[(first_row_in_matrix + row_in_block) * result.
stride() + result.
start()] = alpha * result_values[row_in_block];
 
 1828 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1835   NumericT           * result_buf     = detail::extract_raw_pointer<NumericT>(result.
handle());
 
 1836   NumericT     const * vec_buf        = detail::extract_raw_pointer<NumericT>(vec.
handle());
 
 1837   NumericT     const * elements       = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1838   unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
 1839   NumericT     const * csr_elements   = detail::extract_raw_pointer<NumericT>(mat.
handle5());
 
 1840   unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
 
 1841   unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
 
 1851     for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
 
 1856       if (val > 0 || val < 0)
 
 1858         unsigned int col = coords[offset];
 
 1859         sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
 
 1869     for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
 
 1871         sum += (vec_buf[csr_col_buffer[item_id] * vec.
stride() + vec.
start()] * csr_elements[item_id]);
 
 1874     if (beta < 0 || beta > 0)
 
 1877       result_buf[index] = alpha * sum + beta * result_buf[index];
 
 1896 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1901   NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
 
 1902   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 1919       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1921       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 1924       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1926       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 1928   NumericT     const * elements       = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 1929   unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
 1930   NumericT     const * csr_elements   = detail::extract_raw_pointer<NumericT>(mat.
handle5());
 
 1931   unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
 
 1932   unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
 
 1935   for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
 
 1944       for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
 
 1949         if (val < 0 || val > 0)  
 
 1953             sum += d_mat_wrapper_row(col, result_col) * val;
 
 1955             sum += d_mat_wrapper_col(col, result_col) * val;
 
 1966         for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
 
 1967           sum += d_mat_wrapper_row(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
 
 1969         for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
 
 1970           sum += d_mat_wrapper_col(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
 
 1973         result_wrapper_row(
row, result_col) = 
sum;
 
 1975         result_wrapper_col(
row, result_col) = 
sum;
 
 1989 template<
typename NumericT, 
unsigned int AlignmentV>
 
 1996   NumericT const * d_mat_data  = detail::extract_raw_pointer<NumericT>(d_mat);
 
 1997   NumericT       * result_data = detail::extract_raw_pointer<NumericT>(result);
 
 2014       d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 2016       d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
 
 2019       result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 2021       result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
 
 2023   NumericT     const * elements       = detail::extract_raw_pointer<NumericT>(mat.
handle());
 
 2024   unsigned int const * coords         = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
 
 2025   NumericT     const * csr_elements   = detail::extract_raw_pointer<NumericT>(mat.
handle5());
 
 2026   unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
 
 2027   unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
 
 2030   for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
 
 2039       for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
 
 2044         if (val < 0 || val > 0)  
 
 2047           if (d_mat.lhs().row_major())
 
 2048             sum += d_mat_wrapper_row(result_col, col) * val;
 
 2050             sum += d_mat_wrapper_col(result_col, col) * val;
 
 2060       if (d_mat.lhs().row_major())
 
 2061         for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
 
 2062           sum += d_mat_wrapper_row(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
 
 2064         for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
 
 2065           sum += d_mat_wrapper_col(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
 
 2068         result_wrapper_row(
row, result_col) = 
sum;
 
 2070         result_wrapper_col(
row, result_col) = 
sum;
 
const vcl_size_t & size2() const 
Returns the number of columns. 
vcl_size_t internal_ellnnz() const 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
const handle_type & handle3() const 
const vcl_size_t & size1() const 
Returns the number of rows. 
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
const handle_type & handle() const 
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
const handle_type & handle12() const 
Returns the OpenCL handle to the (row, column) index array. 
A tag class representing a lower triangular matrix. 
vcl_size_t internal_size1() const 
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
vcl_size_t nnz() const 
Returns the number of nonzero entries. 
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
const handle_type & handle4() const 
T max(const T &lhs, const T &rhs)
Maximum. 
vcl_size_t rows_per_block() const 
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
vcl_size_t internal_size1() const 
size_type stride() const 
Returns the stride within the buffer (in multiples of sizeof(NumericT)) 
const handle_type & handle2() const 
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
void row_C_scan_numeric_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, NumericT const *A_elements, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, NumericT const *B_elements, unsigned int B_size2, unsigned int row_start_C, unsigned int row_end_C, unsigned int *C_col_buffer, NumericT *C_elements, unsigned int *row_C_vector_1, NumericT *row_C_vector_1_values, unsigned int *row_C_vector_2, NumericT *row_C_vector_2_values, unsigned int *row_C_vector_3, NumericT *row_C_vector_3_values)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix. 
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
A tag class representing an upper triangular matrix. 
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication. 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
const handle_type & handle3() const 
Returns the OpenCL handle to the row index array. 
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
unsigned int row_C_scan_symbolic_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, unsigned int B_size2, unsigned int *row_C_vector_1, unsigned int *row_C_vector_2, unsigned int *row_C_vector_3)
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< NumericT > const &, vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
size_type size2() const 
Returns the number of columns. 
Common routines for single-threaded or OpenMP-enabled execution on CPU. 
vcl_size_t maxnnz() const 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
size_type size() const 
Returns the length of the vector (cf. std::vector) 
const vcl_size_t & nnz1() const 
Returns the number of nonzero entries. 
void csr_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing a lower triangular matrix with unit diagonal. 
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices. 
void row_info(compressed_matrix< NumericT, AlignmentV > const &mat, vector_base< NumericT > &vec, viennacl::linalg::detail::row_info_types info_selector)
A sparse square matrix in compressed sparse rows format. 
T min(const T &lhs, const T &rhs)
Minimum. 
const handle_type & handle5() const 
size_type start() const 
Returns the offset within the buffer. 
vcl_size_t size1() const 
Returns the number of rows. 
vcl_size_t internal_maxnnz() const 
Implementation of the ViennaCL scalar class. 
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix. 
const handle_type & handle() const 
Returns the memory handle. 
void csr_trans_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
A tag class representing an upper triangular matrix with unit diagonal. 
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...