1 #ifndef VIENNACL_LINALG_BICGSTAB_HPP_ 
    2 #define VIENNACL_LINALG_BICGSTAB_HPP_ 
   57     : tol_(tol), abs_tol_(0), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {}
 
   65   void abs_tolerance(
double new_tol) { 
if (new_tol >= 0) abs_tol_ = new_tol; }
 
   77   double error()
 const { 
return last_error_; }
 
   79   void error(
double e)
 const { last_error_ = e; }
 
   89   mutable double last_error_;
 
   97   template<
typename MatrixT, 
typename NumericT>
 
  103                                              void *monitor_data = NULL)
 
  124     std::vector<NumericT>      host_inner_prod_buffer(inner_prod_buffer.
size());
 
  130     NumericT residual_norm = norm_rhs_host;
 
  131     inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;
 
  149                                                 inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector);
 
  169                                                     inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector);
 
  176                                                 inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector);
 
  182       typedef typename std::vector<NumericT>::difference_type       difference_type;
 
  184        r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(),                                               host_inner_prod_buffer.begin() + difference_type(    buffer_size_per_vector), 
NumericT(0));
 
  185       As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + difference_type(    buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), 
NumericT(0));
 
  186       As_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), 
NumericT(0));
 
  187       Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), 
NumericT(0));
 
  188       As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), 
NumericT(0));
 
  189        s_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(6 * buffer_size_per_vector), 
NumericT(0));
 
  191       alpha =   r_dot_r0 / Ap_dot_r0;
 
  192       beta  = - As_dot_r0 / Ap_dot_r0;
 
  193       omega =   As_dot_s  / As_dot_As;
 
  195       residual_norm = std::sqrt(s_dot_s - 
NumericT(2.0) * omega * As_dot_s + omega * omega *  As_dot_As);
 
  196       if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
 
  208                                                           r0star, inner_prod_buffer, buffer_size_per_vector);
 
  212     tag.
error(residual_norm / norm_rhs_host);
 
  218   template<
typename NumericT>
 
  224                                         void *monitor_data = NULL)
 
  231   template<
typename NumericT>
 
  237                                         void *monitor_data = NULL)
 
  245   template<
typename NumericT>
 
  251                                         void *monitor_data = NULL)
 
  259   template<
typename NumericT>
 
  265                                         void *monitor_data = NULL)
 
  272   template<
typename NumericT>
 
  278                                         void *monitor_data = NULL)
 
  295   template<
typename MatrixT, 
typename VectorT>
 
  301                      void *monitor_data = NULL)
 
  305     VectorT result = rhs;
 
  308     VectorT residual = rhs;
 
  310     VectorT r0star = rhs;
 
  316     CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host;
 
  317     CPU_NumericType beta;
 
  318     CPU_NumericType alpha;
 
  319     CPU_NumericType omega;
 
  321     CPU_NumericType new_ip_rr0star = 0;
 
  322     CPU_NumericType residual_norm = norm_rhs_host;
 
  327     bool restart_flag = 
true;
 
  334         residual = rhs - residual;
 
  338         ip_rr0star *= ip_rr0star;
 
  339         restart_flag = 
false;
 
  347       s = residual - alpha*tmp0;
 
  353       result += alpha * p + omega * s;
 
  354       residual = s - omega * tmp1;
 
  358       if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
 
  363       beta = new_ip_rr0star / ip_rr0star * alpha/omega;
 
  364       ip_rr0star = new_ip_rr0star;
 
  366       if (    (ip_rr0star <= 0 && ip_rr0star >= 0)
 
  367            || (omega <= 0 && omega >= 0)
 
  376       p = residual + beta * p;
 
  380     tag.
error(residual_norm / norm_rhs_host);
 
  398   template<
typename MatrixT, 
typename VectorT, 
typename PreconditionerT>
 
  402                      PreconditionerT 
const & precond,
 
  404                      void *monitor_data = NULL)
 
  408     VectorT result = rhs;
 
  411     VectorT residual = rhs;
 
  412     VectorT r0star = residual;  
 
  417     VectorT p = residual;
 
  421     CPU_NumericType beta;
 
  422     CPU_NumericType alpha;
 
  423     CPU_NumericType omega;
 
  424     CPU_NumericType new_ip_rr0star = 0;
 
  425     CPU_NumericType residual_norm = norm_rhs_host;
 
  430     bool restart_flag = 
true;
 
  437         residual = rhs - residual;
 
  438         precond.apply(residual);
 
  442         ip_rr0star *= ip_rr0star;
 
  443         restart_flag = 
false;
 
  452       s = residual - alpha*tmp0;
 
  459       result += alpha * p + omega * s;
 
  460       residual = s - omega * tmp1;
 
  463       if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data))
 
  470       beta = new_ip_rr0star / ip_rr0star * alpha/omega;
 
  471       ip_rr0star = new_ip_rr0star;
 
  480       p = residual + beta * p;
 
  486     tag.
error(residual_norm / norm_rhs_host);
 
  495 template<
typename MatrixT, 
typename VectorT, 
typename PreconditionerT>
 
  496 VectorT 
solve(MatrixT 
const & matrix, VectorT 
const & rhs, 
bicgstab_tag const & tag, PreconditionerT 
const & precond)
 
  507 template<
typename IndexT, 
typename NumericT, 
typename PreconditionerT>
 
  508 std::vector<NumericT> 
solve(std::vector< std::map<IndexT, NumericT> > 
const & A, std::vector<NumericT> 
const & rhs, 
bicgstab_tag const & tag, PreconditionerT 
const & precond)
 
  518   std::vector<NumericT> result(vcl_result.
size());
 
  529 template<
typename MatrixT, 
typename VectorT>
 
  537 template<
typename VectorT>
 
  545   template<
typename MatrixT, 
typename PreconditionerT>
 
  546   VectorT 
operator()(MatrixT 
const & A, VectorT 
const & b, PreconditionerT 
const & precond)
 const 
  551       mod_rhs = b - mod_rhs;
 
  552       VectorT y = 
detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
 
  553       return init_guess_ + y;
 
  559   template<
typename MatrixT>
 
  560   VectorT 
operator()(MatrixT 
const & A, VectorT 
const & b)
 const 
  580     monitor_callback_ = monitor_fun;
 
  581     user_data_ = user_data;
 
  590   bool          (*monitor_callback_)(VectorT 
const &, 
numeric_type, 
void *);
 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
vcl_size_t max_iterations_before_restart() const 
Returns the maximum number of iterations before a restart. 
T norm_2(std::vector< T, A > const &v1)
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
bicgstab_tag(double tol=1e-8, vcl_size_t max_iters=400, vcl_size_t max_iters_before_restart=200)
The constructor. 
void abs_tolerance(double new_tol)
Sets the absolute tolerance. 
Generic size and resize functionality for different vector and matrix types. 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
viennacl::vector< NumericT > solve_impl(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Overload for the pipelined CG implementation for the ViennaCL sparse matrix types. 
double error() const 
Returns the estimated relative error at the end of the solver run. 
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL. 
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
bicgstab_solver(bicgstab_tag const &tag)
double abs_tolerance() const 
Returns the absolute tolerance. 
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations. 
double tolerance() const 
Returns the relative tolerance. 
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Performs a joint vector update operation needed for an efficient pipelined BiCGStab algorithm...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like) 
A tag class representing the use of no preconditioner. 
VectorT operator()(MatrixT const &A, VectorT const &b) const 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
Extracts the underlying context from objects. 
void set_monitor(bool(*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor...
bicgstab_tag const & tag() const 
Returns the solver tag containing basic configuration such as tolerances, etc. 
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Implementation of a pipelined stabilized Bi-conjugate gradient solver. 
vcl_size_t max_iterations() const 
Returns the maximum number of iterations. 
Generic clear functionality for different vector and matrix types. 
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
void pipelined_bicgstab_prod(MatrixT const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm. 
void set_initial_guess(VectorT const &x)
Specifies an initial guess for the iterative solver. 
Implementations of specialized routines for the iterative solvers. 
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
viennacl::result_of::cpu_value_type< VectorT >::type numeric_type
Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void error(double e) const 
Sets the estimated relative error at the end of the solver run. 
size_type size() const 
Returns the length of the vector (cf. std::vector) 
vcl_size_t iters() const 
Return the number of solver iterations: 
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm. 
void iters(vcl_size_t i) const 
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
iterator end()
Returns an iterator pointing to the end of the vector (STL like) 
A collection of compile time type deductions. 
VectorT operator()(MatrixT const &A, VectorT const &b, PreconditionerT const &precond) const 
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)