1 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ 
    2 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ 
   42   template<
typename VectorT, 
typename NumericT, 
typename SizeT = vcl_
size_t>
 
   49                     ) : vec_(v), start_(start_index), size_(vec_size) {}
 
   53       assert(index < size_ && 
bool(
"Index out of bounds!"));
 
   54       return vec_[start_ + index];
 
   59       assert(index < size_ && 
bool(
"Index out of bounds!"));
 
   60       return vec_[start_ + index];
 
   63     SizeT 
size()
 const { 
return size_; }
 
   78   template<
typename NumericT>
 
   89     NumericT     const * A_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.
handle());
 
   90     unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle1());
 
   91     unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.
handle2());
 
   93     NumericT     * output_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.
handle());
 
   94     unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.
handle1());
 
   95     unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.
handle2());
 
  100       unsigned int buffer_col_start = A_row_buffer[
row];
 
  101       unsigned int buffer_col_end   = A_row_buffer[
row+1];
 
  103       output_row_buffer[
row - start_index] = 
static_cast<unsigned int>(output_counter);
 
  105       for (
unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index)
 
  107         unsigned int col = A_col_buffer[buf_index];
 
  108         if (col < start_index)
 
  111         if (col >= static_cast<unsigned int>(stop_index))
 
  114         output_col_buffer[output_counter] = 
static_cast<unsigned int>(col - start_index);
 
  115         output_elements[output_counter] = A_elements[buf_index];
 
  118       output_row_buffer[
row - start_index + 1] = 
static_cast<unsigned int>(output_counter);
 
  131 template<
typename MatrixT, 
typename ILUTag>
 
  134 typedef typename MatrixT::value_type      ScalarType;
 
  143                    ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks)
 
  146     block_indices_.resize(num_blocks);
 
  149       vcl_size_t start_index = (   i  * mat.size1()) / num_blocks;
 
  150       vcl_size_t stop_index  = ((i+1) * mat.size1()) / num_blocks;
 
  152       block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
 
  164                    ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.
size()), U_blocks(block_boundaries.
size())
 
  173   template<
typename VectorT>
 
  176     for (
vcl_size_t i=0; i<block_indices_.size(); ++i)
 
  177       apply_dispatch(vec, i, ILUTag());
 
  181   void init(MatrixT 
const & A)
 
  188     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
 
  190 #ifdef VIENNACL_WITH_OPENMP 
  191     #pragma omp parallel for 
  193     for (
long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
 
  197       vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first;
 
  198       vcl_size_t block_nnz  = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first];
 
  206       init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_);
 
  231   template<
typename VectorT>
 
  234     detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].
size2());
 
  236     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
 
  237     unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
 
  238     ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
 
  240     viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
 
  241     viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag());
 
  244   template<
typename VectorT>
 
  247     detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].
size2());
 
  250       unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1());
 
  251       unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2());
 
  252       ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
 
  254       viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
 
  258       unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1());
 
  259       unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2());
 
  260       ScalarType   const * elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle());
 
  262       viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag());
 
  268   std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks;
 
  269   std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks;
 
  280 template<
typename NumericT, 
unsigned int AlignmentV, 
typename ILUTagT>
 
  293                        block_indices_(num_blocks),
 
  294                        gpu_block_indices_(),
 
  298                        L_blocks_(num_blocks),
 
  299                        U_blocks_(num_blocks)
 
  302     block_indices_.resize(num_blocks);
 
  308       block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index);
 
  321                        block_indices_(block_boundaries),
 
  322                        gpu_block_indices_(),
 
  326                        L_blocks_(block_boundaries.
size()),
 
  327                        U_blocks_(block_boundaries.
size())
 
  352   void init(MatrixType 
const & A)
 
  359     unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
 
  361 #ifdef VIENNACL_WITH_OPENMP 
  362     #pragma omp parallel for 
  364     for (
long i=0; i<static_cast<long>(block_indices_.size()); ++i)
 
  367       vcl_size_t block_size = block_indices_[
static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first;
 
  368       vcl_size_t block_nnz  = row_buffer[block_indices_[
static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
 
  371       detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second);
 
  376       init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_);
 
  383     for (
vcl_size_t i=0; i<block_indices_.size(); ++i)
 
  385       block_indices_uint.
set(2*i,     block_indices_[i].first);
 
  386       block_indices_uint.set(2*i + 1, block_indices_[i].second);
 
  396   void blocks_to_device(MatrixType 
const & A)
 
  398     gpu_L_trans_.resize(A.size1(), A.size2());
 
  399     gpu_U_trans_.resize(A.size1(), A.size2());
 
  400     gpu_D_.resize(A.size1());
 
  402     unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1());
 
  403     unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1());
 
  408 #ifdef VIENNACL_WITH_OPENMP 
  409     #pragma omp parallel for 
  411     for (
long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
 
  415       unsigned int block_start = 
static_cast<unsigned int>(block_indices_[block_index].first);
 
  416       unsigned int block_stop  = 
static_cast<unsigned int>(block_indices_[block_index].second);
 
  418       unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
 
  419       unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
 
  422       std::fill(L_trans_row_buffer + block_start,
 
  423                 L_trans_row_buffer + block_stop,
 
  424                 static_cast<unsigned int>(0));
 
  429         unsigned int col_start = L_row_buffer[
row];
 
  430         unsigned int col_end   = L_row_buffer[
row+1];
 
  432         for (
unsigned int j = col_start; j < col_end; ++j)
 
  434           unsigned int col = L_col_buffer[j];
 
  435           if (col < static_cast<unsigned int>(
row))
 
  436             L_trans_row_buffer[col + block_start] += 1;
 
  442       unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
 
  443       unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
 
  446       std::fill(U_trans_row_buffer + block_start,
 
  447                 U_trans_row_buffer + block_stop,
 
  448                 static_cast<unsigned int>(0));
 
  453         unsigned int col_start = U_row_buffer[
row];
 
  454         unsigned int col_end   = U_row_buffer[
row+1];
 
  456         for (
unsigned int j = col_start; j < col_end; ++j)
 
  458           unsigned int col = U_col_buffer[j];
 
  460             U_trans_row_buffer[col + block_start] += 1;
 
  469     unsigned int current_value = 0;
 
  470     for (
vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i)
 
  472       unsigned int tmp = L_trans_row_buffer[i];
 
  473       L_trans_row_buffer[i] = current_value;
 
  474       current_value += tmp;
 
  476     gpu_L_trans_.reserve(current_value);
 
  479     for (
vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i)
 
  481       unsigned int tmp = U_trans_row_buffer[i];
 
  482       U_trans_row_buffer[i] = current_value;
 
  483       current_value += tmp;
 
  485     gpu_U_trans_.reserve(current_value);
 
  491     unsigned int       * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2());
 
  492     NumericT           * L_trans_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle());
 
  494     unsigned int       * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2());
 
  495     NumericT           * U_trans_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle());
 
  497     NumericT           * D_elements         = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle());
 
  499     std::vector<unsigned int> offset_L(gpu_L_trans_.size1());
 
  500     std::vector<unsigned int> offset_U(gpu_U_trans_.size1());
 
  502 #ifdef VIENNACL_WITH_OPENMP 
  503     #pragma omp parallel for 
  505     for (
long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2)
 
  508       unsigned int block_start = 
static_cast<unsigned int>(block_indices_[block_index].first);
 
  510       unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1());
 
  511       unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2());
 
  512       NumericT     const * L_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT    >(L_blocks_[block_index].handle());
 
  518         unsigned int col_start = L_row_buffer[
row];
 
  519         unsigned int col_end   = L_row_buffer[
row+1];
 
  521         for (
unsigned int j = col_start; j < col_end; ++j)
 
  523           unsigned int col = L_col_buffer[j];
 
  526             unsigned int row_trans = col + block_start;
 
  527             unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans];
 
  528             offset_L[row_trans] += 1;
 
  530             L_trans_col_buffer[k] = 
static_cast<unsigned int>(
row) + block_start;
 
  531             L_trans_elements[k]   = L_elements[j];
 
  536       unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1());
 
  537       unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2());
 
  538       NumericT     const * U_elements   = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT    >(U_blocks_[block_index].handle());
 
  543         unsigned int col_start = U_row_buffer[
row];
 
  544         unsigned int col_end   = U_row_buffer[
row+1];
 
  546         for (
unsigned int j = col_start; j < col_end; ++j)
 
  548           unsigned int row_trans = U_col_buffer[j] + block_start;
 
  549           unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans];
 
  551           if (row_trans == 
row + block_start) 
 
  553             D_elements[row_trans] = U_elements[j];
 
  555           else if (row_trans > 
row + block_start) 
 
  557             offset_U[row_trans] += 1;
 
  559             U_trans_col_buffer[k] = 
static_cast<unsigned int>(
row) + block_start;
 
  560             U_trans_elements[k]   = U_elements[j];
 
  603   std::vector<MatrixType> L_blocks_;
 
  604   std::vector<MatrixType> U_blocks_;
 
const vcl_size_t & size2() const 
Returns the number of columns. 
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value. 
Helper class implementing an array on the host. Default case: No conversion necessary. 
const vcl_size_t & size1() const 
Returns the number of rows. 
NumericT & operator[](SizeT index)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
A tag for incomplete LU factorization with static pattern (ILU0) 
void precondition(viennacl::compressed_matrix< NumericT > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
This file provides the forward declarations for the main types used within ViennaCL. 
Implementations of incomplete factorization preconditioners with static nonzero pattern. 
A block ILU preconditioner class, can be supplied to solve()-routines. 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const 
Returns the OpenCL handle to the matrix entry array. 
const handle_type & handle1() const 
Returns the OpenCL handle to the row index array. 
void extract_block_matrix(viennacl::compressed_matrix< NumericT > const &A, viennacl::compressed_matrix< NumericT > &diagonal_block_A, vcl_size_t start_index, vcl_size_t stop_index)
Extracts a diagonal block from a larger system matrix. 
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
A tag class representing an upper triangular matrix. 
A tag for incomplete LU factorization with threshold (ILUT) 
const handle_type & handle2() const 
Returns the OpenCL handle to the column index array. 
NumericT & operator()(SizeT index)
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, vcl_size_t num_blocks=8)
ilu_vector_range(VectorT &v, SizeT start_index, SizeT vec_size)
std::vector< std::pair< vcl_size_t, vcl_size_t > > index_vector_type
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void apply(VectorT &vec) const 
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, index_vector_type const &block_boundaries)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Implementations of an incomplete factorization preconditioner with threshold (ILUT) ...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
block_ilu_precond(MatrixType const &mat, ILUTagT const &tag, vcl_size_t num_blocks=8)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void set(vcl_size_t index, U value)
A tag class representing a lower triangular matrix with unit diagonal. 
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Helper range class for representing a subvector of a larger buffer. 
Common routines used within ILU-type preconditioners. 
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix. 
void apply(vector< NumericT > &vec) const 
block_ilu_precond(MatrixT const &mat, ILUTag const &tag, index_vector_type const &block_boundaries)
memory_types get_active_handle_id() const 
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixType >::value >::type block_inplace_solve(const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::backend::mem_handle const &block_index_array, vcl_size_t num_blocks, viennacl::vector_base< ScalarType > const &mat_diagonal, viennacl::vector_base< ScalarType > &vec, SOLVERTAG tag)
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.