1 #ifndef VIENNACL_GMRES_HPP_ 
    2 #define VIENNACL_GMRES_HPP_ 
   67   void abs_tolerance(
double new_tol) { 
if (new_tol >= 0) abs_tol_ = new_tol; }
 
   76     unsigned int ret = iterations_ / krylov_dim_;
 
   77     if (ret > 0 && (ret * krylov_dim_ == iterations_) )
 
   83   unsigned int iters()
 const { 
return iters_taken_; }
 
   85   void iters(
unsigned int i)
 const { iters_taken_ = i; }
 
   88   double error()
 const { 
return last_error_; }
 
   90   void error(
double e)
 const { last_error_ = e; }
 
   95   unsigned int iterations_;
 
   96   unsigned int krylov_dim_;
 
   99   mutable unsigned int iters_taken_;
 
  100   mutable double last_error_;
 
  106   template<
typename SrcVectorT, 
typename DestVectorT>
 
  113   template<
typename NumericT, 
typename DestVectorT>
 
  118                     src.
begin() + 
static_cast<difference_type
>(
start + len),
 
  119                    dest.begin() + 
static_cast<difference_type
>(
start));
 
  130   template<
typename VectorT, 
typename NumericT>
 
  148       mu = std::sqrt(sigma + input_j*input_j);
 
  150       NumericT hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
 
  152       beta = 
NumericT(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
 
  161   template<
typename VectorT, 
typename NumericT>
 
  165     x -= (beta * hT_in_x) * h;
 
  181   template <
typename MatrixType, 
typename ScalarType>
 
  187                                                void *monitor_data = NULL)
 
  194     std::vector<ScalarType>      host_buffer_R(device_buffer_R.size());
 
  202     std::vector<ScalarType>      host_r_dot_vk_buffer(device_r_dot_vk_buffer.
size());
 
  203     std::vector<ScalarType>      host_values_xi_k(tag.
krylov_dim());
 
  204     std::vector<ScalarType>      host_values_eta_k_buffer(tag.
krylov_dim());
 
  205     std::vector<ScalarType>      host_update_coefficients(tag.
krylov_dim());
 
  213     for (
unsigned int restart_count = 0; restart_count <= tag.
max_restarts(); ++restart_count)
 
  218       if (restart_count > 0)
 
  222         residual = rhs - residual;
 
  241       for (k = 0; k < static_cast<vcl_size_t>(tag.
krylov_dim()); ++k)
 
  252                                                          device_inner_prod_buffer, device_r_dot_vk_buffer,
 
  253                                                          buffer_size_per_vector, k*buffer_size_per_vector);
 
  272                                                                 device_vi_in_vk_buffer,
 
  274                                                                 device_inner_prod_buffer, buffer_size_per_vector);
 
  281                                                          device_inner_prod_buffer, device_r_dot_vk_buffer,
 
  282                                                          buffer_size_per_vector, k*buffer_size_per_vector);
 
  290       viennacl::fast_copy(device_r_dot_vk_buffer.begin(), device_r_dot_vk_buffer.end(), host_r_dot_vk_buffer.begin());
 
  291       for (std::size_t i=0; i<k; ++i)
 
  294         for (std::size_t j=0; j<buffer_size_per_vector; ++j)
 
  295           host_values_xi_k[i] += host_r_dot_vk_buffer[i*buffer_size_per_vector + j];
 
  301       viennacl::fast_copy(device_buffer_R.begin(), device_buffer_R.end(), host_buffer_R.begin());
 
  307       for (std::size_t i=0; i<k; ++i)
 
  309         if (std::fabs(host_buffer_R[i + i*k]) < tag.
tolerance() * host_buffer_R[0])
 
  318       for (std::size_t i=0; i<k; ++i)
 
  323         if (host_values_xi_k[i] >= rho || host_values_xi_k[i] <= -rho)
 
  330         rho *= std::sin( std::acos(host_values_xi_k[i] / rho) );
 
  336       host_values_eta_k_buffer = host_values_xi_k;
 
  338       for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
 
  341         for (
vcl_size_t j=static_cast<vcl_size_t>(i)+1; j<k; ++j)
 
  342           host_values_eta_k_buffer[i] -= host_buffer_R[i + j*full_krylov_dim] * host_values_eta_k_buffer[j];
 
  344         host_values_eta_k_buffer[i] /= host_buffer_R[i + i*full_krylov_dim];
 
  352         host_update_coefficients[i] = rho_0 * host_values_eta_k_buffer[i];
 
  354       viennacl::fast_copy(host_update_coefficients.begin(), host_update_coefficients.end(), device_values_xi_k.begin()); 
 
  358                                                       device_values_xi_k, k);
 
  360       tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
 
  362       if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
 
  370   template<
typename NumericT>
 
  376                                         void *monitor_data = NULL)
 
  383   template<
typename NumericT>
 
  389                                         void *monitor_data = NULL)
 
  397   template<
typename NumericT>
 
  403                                         void *monitor_data = NULL)
 
  411   template<
typename NumericT>
 
  417                                         void *monitor_data = NULL)
 
  424   template<
typename NumericT>
 
  430                                         void *monitor_data = NULL)
 
  449   template<
typename MatrixT, 
typename VectorT, 
typename PreconditionerT>
 
  453                      PreconditionerT 
const & precond,
 
  455                      void *monitor_data = NULL)
 
  461     VectorT result = rhs;
 
  465     if (problem_size < krylov_dim)
 
  466       krylov_dim = problem_size; 
 
  469     VectorT v_k_tilde = rhs;
 
  470     VectorT v_k_tilde_temp = rhs;
 
  472     std::vector< std::vector<CPU_NumericType> > R(krylov_dim, std::vector<CPU_NumericType>(tag.
krylov_dim()));
 
  473     std::vector<CPU_NumericType> projection_rhs(krylov_dim);
 
  475     std::vector<VectorT>          householder_reflectors(krylov_dim, rhs);
 
  476     std::vector<CPU_NumericType>  betas(krylov_dim);
 
  485     for (
unsigned int it = 0; it <= tag.
max_restarts(); ++it)
 
  501         tag.
error(rho_0 / norm_rhs);
 
  509       CPU_NumericType rho = 
static_cast<CPU_NumericType
>(1.0);
 
  516       for (k = 0; k < krylov_dim; ++k)
 
  528           precond.apply(v_k_tilde);
 
  533           v_k_tilde[k-1] = CPU_NumericType(1);
 
  536           for (
int i = static_cast<int>(k)-1; i > -1; --i)
 
  540           precond.apply(v_k_tilde_temp);
 
  541           v_k_tilde = v_k_tilde_temp;
 
  551         CPU_NumericType rho_k_k = 0;
 
  572         projection_rhs[k] = res[k];
 
  574         rho *= std::sin( std::acos(projection_rhs[k] / rho) );
 
  576         if (std::fabs(rho * rho_0 / norm_rhs) < tag.
tolerance())  
 
  578           tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
 
  588       for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
 
  592           projection_rhs[i] -= R[j][i] * projection_rhs[j];     
 
  594         projection_rhs[i] /= R[i][i];
 
  601       res *= projection_rhs[0];
 
  605         for (
unsigned int i = 0; i < k-1; ++i)
 
  606           res[i] += projection_rhs[i+1];
 
  612       for (
int i=static_cast<int>(k)-1; i>=0; --i)
 
  621       tag.
error(std::fabs(rho*rho_0 / norm_rhs));
 
  623       if (monitor && monitor(result, std::fabs(rho*rho_0 / norm_rhs), monitor_data))
 
  635 template<
typename MatrixT, 
typename VectorT, 
typename PreconditionerT>
 
  636 VectorT 
solve(MatrixT 
const & matrix, VectorT 
const & rhs, 
gmres_tag const & tag, PreconditionerT 
const & precond)
 
  646 template<
typename IndexT, 
typename NumericT, 
typename PreconditionerT>
 
  647 std::vector<NumericT> 
solve(std::vector< std::map<IndexT, NumericT> > 
const & A, std::vector<NumericT> 
const & rhs, 
gmres_tag const & tag, PreconditionerT 
const & precond)
 
  657   std::vector<NumericT> result(vcl_result.
size());
 
  669 template<
typename MatrixT, 
typename VectorT>
 
  677 template<
typename VectorT>
 
  685   template<
typename MatrixT, 
typename PreconditionerT>
 
  686   VectorT 
operator()(MatrixT 
const & A, VectorT 
const & b, PreconditionerT 
const & precond)
 const 
  691       mod_rhs = b - mod_rhs;
 
  692       VectorT y = 
detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
 
  693       return init_guess_ + y;
 
  699   template<
typename MatrixT>
 
  700   VectorT 
operator()(MatrixT 
const & A, VectorT 
const & b)
 const 
  720     monitor_callback_ = monitor_fun;
 
  721     user_data_ = user_data;
 
  730   bool       (*monitor_callback_)(VectorT 
const &, 
numeric_type, 
void *);
 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
unsigned int max_restarts() const 
Returns the maximum number of GMRES restarts. 
T norm_2(std::vector< T, A > const &v1)
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::result_of::cpu_value_type< VectorT >::type numeric_type
unsigned int max_iterations() const 
Returns the maximum number of iterations. 
Generic size and resize functionality for different vector and matrix types. 
unsigned int iters() const 
Return the number of solver iterations: 
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void abs_tolerance(double new_tol)
Sets the absolute tolerance. 
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
Computes the first reduction stage for multiple inner products , i=0..k-1. 
VectorT operator()(MatrixT const &A, VectorT const &b, PreconditionerT const &precond) const 
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t k)
Computes x += eta_0 r + sum_{i=1}^{k-1} eta_i v_{i-1}. 
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Computes the second reduction stage for multiple inner products , i=0..k-1, then updates v_k -=  v_i and computes the first reduction stage for ||v_k||. 
viennacl::vector< NumericT > solve_impl(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Overload for the pipelined CG implementation for the ViennaCL sparse matrix types. 
double abs_tolerance() const 
Returns the absolute tolerance. 
void iters(unsigned int i) const 
Set the number of solver iterations (should only be modified by the solver) 
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL. 
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
void error(double e) const 
Sets the estimated relative error at the end of the solver run. 
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations. 
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
void set_initial_guess(VectorT const &x)
Specifies an initial guess for the iterative solver. 
VectorT solve(MatrixT const &matrix, VectorT const &rhs, bicgstab_tag const &tag, PreconditionerT const &precond)
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor. 
VectorT operator()(MatrixT const &A, VectorT const &b) const 
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
Class for representing non-strided subvectors of a bigger vector x. 
Sparse matrix class using the ELLPACK format for storing the nonzeros. 
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like) 
A tag class representing the use of no preconditioner. 
void pipelined_gmres_prod(MatrixType const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined GMRES algorithm. 
base_type::difference_type difference_type
gmres_tag const & tag() const 
Returns the solver tag containing basic configuration such as tolerances, etc. 
Sparse matrix class using the sliced ELLPACK with parameters C, . 
result_of::size_type< T >::type start(T const &obj)
Extracts the underlying context from objects. 
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm. 
void gmres_setup_householder_vector(VectorT const &input_vec, VectorT &hh_vec, NumericT &beta, NumericT &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond, bool(*monitor)(viennacl::vector< NumericT > const &, NumericT, void *)=NULL, void *monitor_data=NULL)
Implementation of a pipelined stabilized Bi-conjugate gradient solver. 
gmres_solver(gmres_tag const &tag)
void gmres_copy_helper(SrcVectorT const &src, DestVectorT &dest, vcl_size_t len, vcl_size_t start=0)
Generic clear functionality for different vector and matrix types. 
void set_monitor(bool(*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor...
double tolerance() const 
Returns the relative tolerance. 
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors. 
Implementations of specialized routines for the iterative solvers. 
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
Helper meta function for retrieving the main RAM-based value type. Particularly important to obtain T...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const 
Returns the length of the vector (cf. std::vector) 
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded. 
double error() const 
Returns the estimated relative error at the end of the solver run. 
size_type internal_size() const 
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
A collection of compile time type deductions. 
unsigned int krylov_dim() const 
Returns the maximum dimension of the Krylov space before restart. 
void gmres_householder_reflect(VectorT &x, VectorT const &h, NumericT beta)
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)