1 #ifndef VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_HOST_BASED_ILU_OPERATIONS_HPP_ 
   41 #ifndef VIENNACL_OPENMP_ILU_MIN_SIZE 
   42   #define VIENNACL_OPENMP_ILU_MIN_SIZE  5000 
   52 template<
typename NumericT>
 
   58   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
   59   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
   60   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.
handle());
 
   62   unsigned int       *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
   67 #ifdef VIENNACL_WITH_OPENMP 
   68     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
   70   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
   72     unsigned int col_begin = A_row_buffer[
row];
 
   73     unsigned int col_end   = A_row_buffer[
row+1];
 
   75     for (
unsigned int j = col_begin; j < col_end; ++j)
 
   77       unsigned int col = A_col_buffer[j];
 
   90   unsigned int       *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
   91   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
   96 #ifdef VIENNACL_WITH_OPENMP 
   97     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
   99   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  101     unsigned int col_begin = A_row_buffer[
row];
 
  102     unsigned int col_end   = A_row_buffer[
row+1];
 
  104     unsigned int index_L = L_row_buffer[
row];
 
  105     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  107       unsigned int col = A_col_buffer[j];
 
  110       if (
long(col) <= 
row)
 
  112         L_col_buffer[index_L] = col;
 
  113         L_elements[index_L]   = value;
 
  123 template<
typename NumericT>
 
  129   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
  130   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
  131   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.
handle());
 
  133   NumericT           *D_elements   = detail::extract_raw_pointer<NumericT>(D.handle());
 
  138 #ifdef VIENNACL_WITH_OPENMP 
  139   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  141   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  143     unsigned int col_begin = A_row_buffer[
row];
 
  144     unsigned int col_end   = A_row_buffer[
row+1];
 
  146     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  148       unsigned int col = A_col_buffer[j];
 
  151         D_elements[
row] = 
NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
 
  160   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  161   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  162   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  164 #ifdef VIENNACL_WITH_OPENMP 
  165   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  167   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  169     unsigned int col_begin = L_row_buffer[
row];
 
  170     unsigned int col_end   = L_row_buffer[
row+1];
 
  174     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  175       L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
 
  184 template<
typename NumericT>
 
  188   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  189   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  190   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  192   NumericT           *aij_ptr   = detail::extract_raw_pointer<NumericT>(aij_L.
handle());
 
  198 #ifdef VIENNACL_WITH_OPENMP 
  199     #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  201   for (
long i = 0; i < static_cast<long>(L.
nnz()); ++i)
 
  202     L_backup[i] = L_elements[i];
 
  206 #ifdef VIENNACL_WITH_OPENMP 
  207     #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  209   for (
long row = 0; row < static_cast<long>(L.
size1()); ++
row)
 
  214     unsigned int row_Li_start = L_row_buffer[
row];
 
  215     unsigned int row_Li_end   = L_row_buffer[
row + 1];
 
  217     for (
unsigned int i = row_Li_start; i < row_Li_end; ++i)
 
  219       unsigned int col = L_col_buffer[i];
 
  221       unsigned int row_Lj_start = L_row_buffer[col];
 
  222       unsigned int row_Lj_end   = L_row_buffer[col+1];
 
  225       unsigned int index_Lj = row_Lj_start;
 
  226       unsigned int col_Lj = L_col_buffer[index_Lj];
 
  228       for (
unsigned int index_Li = row_Li_start; index_Li < i; ++index_Li)
 
  230         unsigned int col_Li = L_col_buffer[index_Li];
 
  233         while (col_Lj < col_Li)
 
  236           col_Lj = L_col_buffer[index_Lj];
 
  239         if (col_Lj == col_Li)
 
  240           s -= L_backup[index_Li] * L_backup[index_Lj];
 
  244         L_elements[i] = s / L_backup[row_Lj_end - 1]; 
 
  246         L_elements[i] = std::sqrt(s);
 
  257 template<
typename NumericT>
 
  264   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
  265   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
  266   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.
handle());
 
  268   unsigned int       *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  269   unsigned int       *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
 
  274 #ifdef VIENNACL_WITH_OPENMP 
  275     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  277   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  279     unsigned int col_begin = A_row_buffer[
row];
 
  280     unsigned int col_end   = A_row_buffer[
row+1];
 
  282     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  284       unsigned int col = A_col_buffer[j];
 
  285       if (
long(col) <= 
row)
 
  287       if (
long(col) >= 
row)
 
  303   unsigned int       *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  304   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  306   unsigned int       *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
 
  307   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U.
handle());
 
  312 #ifdef VIENNACL_WITH_OPENMP 
  313     #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  315   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  317     unsigned int col_begin = A_row_buffer[
row];
 
  318     unsigned int col_end   = A_row_buffer[
row+1];
 
  320     unsigned int index_L = L_row_buffer[
row];
 
  321     unsigned int index_U = U_row_buffer[
row];
 
  322     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  324       unsigned int col = A_col_buffer[j];
 
  327       if (
long(col) <= 
row)
 
  329         L_col_buffer[index_L] = col;
 
  330         L_elements[index_L]   = value;
 
  334       if (
long(col) >= 
row)
 
  336         U_col_buffer[index_U] = col;
 
  337         U_elements[index_U]   = value;
 
  348 template<
typename NumericT>
 
  355   unsigned int const *A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
  356   unsigned int const *A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
  357   NumericT     const *A_elements   = detail::extract_raw_pointer<NumericT>(A.
handle());
 
  359   NumericT           *D_elements   = detail::extract_raw_pointer<NumericT>(D.handle());
 
  364 #ifdef VIENNACL_WITH_OPENMP 
  365   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  367   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  369     unsigned int col_begin = A_row_buffer[
row];
 
  370     unsigned int col_end   = A_row_buffer[
row+1];
 
  372     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  374       unsigned int col = A_col_buffer[j];
 
  377         D_elements[
row] = 
NumericT(1) / std::sqrt(std::fabs(A_elements[j]));
 
  386   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  387   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  388   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  390 #ifdef VIENNACL_WITH_OPENMP 
  391   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  393   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  395     unsigned int col_begin = L_row_buffer[
row];
 
  396     unsigned int col_end   = L_row_buffer[
row+1];
 
  400     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  401       L_elements[j] *= D_row * D_elements[L_col_buffer[j]];
 
  407   unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
 
  408   unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
 
  409   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U.
handle());
 
  411 #ifdef VIENNACL_WITH_OPENMP 
  412   #pragma omp parallel for if (A.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  414   for (
long row = 0; row < static_cast<long>(A.
size1()); ++
row)
 
  416     unsigned int col_begin = U_row_buffer[
row];
 
  417     unsigned int col_end   = U_row_buffer[
row+1];
 
  421     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  422       U_elements[j] *= D_row * D_elements[U_col_buffer[j]];
 
  430 template<
typename NumericT>
 
  434   NumericT     const * A_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.
handle());
 
  435   unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
  436   unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
  441   NumericT     * B_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(B.handle());
 
  442   unsigned int * B_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle1());
 
  443   unsigned int * B_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(B.handle2());
 
  446   for (std::size_t i = 0; i < B.size1(); ++i)
 
  454     unsigned int row_start = A_row_buffer[
row];
 
  455     unsigned int row_stop  = A_row_buffer[
row+1];
 
  457     for (
unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
 
  458       B_row_buffer[A_col_buffer[nnz_index]] += 1;
 
  462   unsigned int offset = B_row_buffer[0];
 
  464   for (std::size_t 
row = 1; 
row < B.size1(); ++
row)
 
  466     unsigned int tmp = B_row_buffer[
row];
 
  467     B_row_buffer[
row] = offset;
 
  470   B_row_buffer[B.size1()] = offset;
 
  476   std::vector<unsigned int> B_row_offsets(B.size1()); 
 
  478   for (
unsigned int row = 0; row < static_cast<unsigned int>(A.
size1()); ++
row)
 
  481     unsigned int row_start = A_row_buffer[
row];
 
  482     unsigned int row_stop  = A_row_buffer[
row+1];
 
  484     for (
unsigned int nnz_index = row_start; nnz_index < row_stop; ++nnz_index)
 
  486       unsigned int col_in_A = A_col_buffer[nnz_index];
 
  487       unsigned int B_nnz_index = B_row_buffer[col_in_A] + B_row_offsets[col_in_A];
 
  488       B_col_buffer[B_nnz_index] = 
row;
 
  489       B_elements[B_nnz_index] = A_elements[nnz_index];
 
  490       ++B_row_offsets[col_in_A];
 
  502 template<
typename NumericT>
 
  508   unsigned int const *L_row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
 
  509   unsigned int const *L_col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
 
  510   NumericT           *L_elements   = detail::extract_raw_pointer<NumericT>(L.
handle());
 
  512   NumericT     const *aij_L_ptr    = detail::extract_raw_pointer<NumericT>(aij_L.
handle());
 
  514   unsigned int const *U_row_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.
handle1());
 
  515   unsigned int const *U_col_buffer = detail::extract_raw_pointer<unsigned int>(U_trans.
handle2());
 
  516   NumericT           *U_elements   = detail::extract_raw_pointer<NumericT>(U_trans.
handle());
 
  518   NumericT     const *aij_U_trans_ptr = detail::extract_raw_pointer<NumericT>(aij_U_trans.
handle());
 
  525 #ifdef VIENNACL_WITH_OPENMP 
  526     #pragma omp parallel for if (L.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  528   for (
long i = 0; i < static_cast<long>(L.
nnz()); ++i)
 
  529     L_backup[i] = L_elements[i];
 
  531 #ifdef VIENNACL_WITH_OPENMP 
  532     #pragma omp parallel for if (U_trans.nnz() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  534   for (
long i = 0; i < static_cast<long>(U_trans.
nnz()); ++i)
 
  535     U_backup[i] = U_elements[i];
 
  538 #ifdef VIENNACL_WITH_OPENMP 
  539     #pragma omp parallel for if (L.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  541   for (
long row = 0; row < static_cast<long>(L.
size1()); ++
row)
 
  546     unsigned int row_L_start = L_row_buffer[
row];
 
  547     unsigned int row_L_end   = L_row_buffer[
row + 1];
 
  549     for (
unsigned int j = row_L_start; j < row_L_end; ++j)
 
  551       unsigned int col = L_col_buffer[j];
 
  556       unsigned int row_U_start = U_row_buffer[col];
 
  557       unsigned int row_U_end   = U_row_buffer[col + 1];
 
  560       unsigned int index_U = row_U_start;
 
  561       unsigned int col_U = (index_U < row_U_end) ? U_col_buffer[index_U] : static_cast<unsigned int>(U_trans.
size2());
 
  563       for (
unsigned int k = row_L_start; k < j; ++k)
 
  565         unsigned int col_L = L_col_buffer[k];
 
  568         while (col_U < col_L)
 
  571           col_U = U_col_buffer[index_U];
 
  575           sum += L_backup[k] * U_backup[index_U];
 
  579       assert(U_col_buffer[row_U_end - 1] == col && 
bool(
"Accessing U element which is not a diagonal element!"));
 
  580       L_elements[j] = (aij_L_ptr[j] - 
sum) / U_backup[row_U_end - 1];  
 
  587     unsigned int row_U_start = U_row_buffer[
row];
 
  588     unsigned int row_U_end   = U_row_buffer[
row + 1];
 
  589     for (
unsigned int j = row_U_start; j < row_U_end; ++j)
 
  591       unsigned int col = U_col_buffer[j];
 
  593       row_L_start = L_row_buffer[col];
 
  594       row_L_end   = L_row_buffer[col + 1];
 
  597       unsigned int index_L = row_L_start;
 
  598       unsigned int col_L = (index_L < row_L_end) ? L_col_buffer[index_L] : static_cast<unsigned int>(L.
size1());
 
  600       for (
unsigned int k = row_U_start; k < j; ++k)
 
  602         unsigned int col_U = U_col_buffer[k];
 
  605         while (col_L < col_U)
 
  608           col_L = L_col_buffer[index_L];
 
  612           sum += L_backup[index_L] * U_backup[k];
 
  616       U_elements[j] = aij_U_trans_ptr[j] - 
sum;
 
  625 template<
typename NumericT>
 
  629   unsigned int *R_row_buffer = detail::extract_raw_pointer<unsigned int>(R.
handle1());
 
  630   unsigned int *R_col_buffer = detail::extract_raw_pointer<unsigned int>(R.
handle2());
 
  631   NumericT     *R_elements   = detail::extract_raw_pointer<NumericT>(R.
handle());
 
  633   NumericT     *diag_R_ptr   = detail::extract_raw_pointer<NumericT>(diag_R.
handle());
 
  635 #ifdef VIENNACL_WITH_OPENMP 
  636     #pragma omp parallel for if (R.size1() > VIENNACL_OPENMP_ILU_MIN_SIZE) 
  638   for (
long row = 0; row < static_cast<long>(R.
size1()); ++
row)
 
  640     unsigned int col_begin = R_row_buffer[
row];
 
  641     unsigned int col_end   = R_row_buffer[
row+1];
 
  645     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  647       unsigned int col = R_col_buffer[j];
 
  650         diag = R_elements[j];
 
  657     assert((diag > 0 || diag < 0) && 
bool(
"Zero diagonal detected!"));
 
  660     for (
unsigned int j = col_begin; j < col_end; ++j)
 
  661       R_elements[j] /= -diag;
 
const vcl_size_t & size2() const 
Returns the number of columns. 
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
Generic size and resize functionality for different vector and matrix types. 
Implementations of vector operations. 
const vcl_size_t & size1() const 
Returns the number of rows. 
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
void icc_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > &aij_L)
Performs one nonlinear relaxation step in the Chow-Patel-ICC using OpenMP (cf. Algorithm 3 in paper...
This file provides the forward declarations for the main types used within ViennaCL. 
Determines row and column increments for matrices and matrix proxies. 
void extract_LU(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
const vcl_size_t & nnz() const 
Returns the number of nonzero entries. 
void ilu_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L, compressed_matrix< NumericT > &U)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
void generate_row_block_information()
Builds the row block information needed for fast sparse matrix-vector multiplications. 
void ilu_form_neumann_matrix(compressed_matrix< NumericT > &R, vector< NumericT > &diag_R)
void extract_L(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
Common routines for single-threaded or OpenMP-enabled execution on CPU. 
void icc_scale(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &L)
Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L a...
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
void ilu_transpose(compressed_matrix< NumericT > const &A, compressed_matrix< NumericT > &B)
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan. 
Implementation of the ViennaCL scalar class. 
const handle_type & handle() const 
Returns the memory handle. 
void ilu_chow_patel_sweep(compressed_matrix< NumericT > &L, vector< NumericT > const &aij_L, compressed_matrix< NumericT > &U_trans, vector< NumericT > const &aij_U_trans)
Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenMP (cf. Algorithm 2 in paper) ...
Simple enable-if variant that uses the SFINAE pattern.