1 #ifndef VIENNACL_LINALG_QR_METHOD_HPP_ 
    2 #define VIENNACL_LINALG_QR_METHOD_HPP_ 
   28 #include <boost/numeric/ublas/vector.hpp> 
   29 #include <boost/numeric/ublas/matrix.hpp> 
   41 template <
typename SCALARTYPE>
 
   49   (void)A; (void)n; (void)last_n; (void)q; (void)p;
 
   50 #ifdef VIENNACL_WITH_OPENCL 
   60                                       static_cast<cl_uint>(n),
 
   61                                       static_cast<cl_uint
>(last_n),
 
   73                                       static_cast<cl_uint>(n),
 
   74                                       static_cast<cl_uint
>(last_n),
 
   81 template <
typename SCALARTYPE, 
typename VectorType>
 
   83                         const VectorType& buf,
 
   91   (void)A; (void)buf; (void)buf_vcl; (void)m; (void)n; (void)last_n;
 
   92 #ifdef VIENNACL_WITH_OPENCL 
  105                                       static_cast<cl_uint>(m),
 
  106                                       static_cast<cl_uint
>(n),
 
  107                                       static_cast<cl_uint>(last_n)
 
  118                                       static_cast<cl_uint>(m),
 
  119                                       static_cast<cl_uint
>(n),
 
  120                                       static_cast<cl_uint>(last_n)
 
  127     template<
typename SCALARTYPE, 
typename MatrixT>
 
  135         for (
int i = 0; i < last_n; i++)
 
  137             SCALARTYPE v_in = A(i, n);
 
  138             SCALARTYPE z = A(i, n - 1);
 
  139             A(i, n - 1) = q * z + p * v_in;
 
  140             A(i, n) = q * v_in - p * z;
 
  144     template<
typename SCALARTYPE, 
typename MatrixT>
 
  146                             const std::vector<SCALARTYPE>& buf,
 
  153         for (
int i = 0; i < last_i; i++)
 
  155             int start_k = is_triangular?
std::max(i + 1, m):m;
 
  157             SCALARTYPE* a_row = A.row(i);
 
  159             SCALARTYPE a_ik   = a_row[start_k];
 
  160             SCALARTYPE a_ik_1 = 0;
 
  161             SCALARTYPE a_ik_2 = 0;
 
  164                 a_ik_1 = a_row[start_k + 1];
 
  166             for (
int k = start_k; k < n; k++)
 
  168                 bool notlast = (k != n - 1);
 
  170                 SCALARTYPE p = buf[5 * 
static_cast<vcl_size_t>(k)] * a_ik + buf[5 * static_cast<vcl_size_t>(k) + 1] * a_ik_1;
 
  174                     a_ik_2 = a_row[k + 2];
 
  175                     p = p + buf[5 * 
static_cast<vcl_size_t>(k) + 2] * a_ik_2;
 
  176                     a_ik_2 = a_ik_2 - p * buf[5 * 
static_cast<vcl_size_t>(k) + 4];
 
  180                 a_ik_1 = a_ik_1 - p * buf[5 * 
static_cast<vcl_size_t>(k) + 3];
 
  192     template<
typename SCALARTYPE>
 
  203             data.resize(internal_size * internal_size);
 
  208             return data[
static_cast<vcl_size_t>(i) * internal_size_ + static_cast<vcl_size_t>(j)];
 
  235     template <
typename SCALARTYPE, 
typename VectorType>
 
  243         int nn = 
static_cast<int>(vcl_H.
size1());
 
  247         std::vector<SCALARTYPE>  buf(5 * 
vcl_size_t(nn));
 
  256         SCALARTYPE eps = 2 * 
static_cast<SCALARTYPE
>(
EPS);
 
  257         SCALARTYPE exshift = 0;
 
  268         SCALARTYPE out1, out2;
 
  272         for (
int i = 0; i < nn; i++)
 
  274             for (
int j = 
std::max(i - 1, 0); j < nn; j++)
 
  275                 norm = norm + std::fabs(H(i, j));
 
  286                 s = std::fabs(H(l - 1, l - 1)) + std::fabs(H(l, l));
 
  289                 if (std::fabs(H(l, l - 1)) < eps * s)
 
  299                 H(n, n) = H(n, n) + exshift;
 
  308                 w = H(n, n - 1) * H(n - 1, n);
 
  309                 p = (H(n - 1, n - 1) - H(n, n)) / 2;
 
  311                 z = 
static_cast<SCALARTYPE
>(std::sqrt(std::fabs(q)));
 
  312                 H(n, n) = H(n, n) + exshift;
 
  313                 H(n - 1, n - 1) = H(n - 1, n - 1) + exshift;
 
  319                     z = (p >= 0) ? (p + z) : (p - z);
 
  322                     if (z <= 0 && z >= 0) 
 
  327                     s = std::fabs(x) + std::fabs(z);
 
  330                     r = 
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q));
 
  335                     for (
int j = n - 1; j < nn; j++)
 
  337                         SCALARTYPE h_nj = H(n, j);
 
  339                         H(n - 1, j) = q * z + p * h_nj;
 
  340                         H(n, j) = q * h_nj - p * z;
 
  369                     w = H(n, n - 1) * H(n - 1, n);
 
  376                     for (
int i = 0; i <= n; i++)
 
  379                     s = std::fabs(H(n, n - 1)) + std::fabs(H(n - 1, n - 2));
 
  380                     x = y = SCALARTYPE(0.75) * s;
 
  381                     w = SCALARTYPE(-0.4375) * s * s;
 
  391                         s = 
static_cast<SCALARTYPE
>(std::sqrt(s));
 
  393                         s = x - w / ((y - x) / 2 + s);
 
  394                         for (
int i = 0; i <= n; i++)
 
  397                         x = y = w = SCALARTYPE(0.964);
 
  407                     SCALARTYPE h_m1_m1 = H(m + 1, m + 1);
 
  411                     p = (r * s - w) / H(m + 1, m) + H(m, m + 1);
 
  412                     q = h_m1_m1 - z - r - s;
 
  414                     s = std::fabs(p) + std::fabs(q) + std::fabs(r);
 
  420                     if (std::fabs(H(m, m - 1)) * (std::fabs(q) + std::fabs(r)) < eps * (std::fabs(p) * (std::fabs(H(m - 1, m - 1)) + std::fabs(z) + std::fabs(h_m1_m1))))
 
  425                 for (
int i = m + 2; i <= n; i++)
 
  433                 for (
int k = m; k < n; k++)
 
  435                     bool notlast = (k != n - 1);
 
  440                         r = (notlast ? H(k + 2, k - 1) : 0);
 
  441                         x = std::fabs(p) + std::fabs(q) + std::fabs(r);
 
  450                     if (x <= 0 && x >= 0) 
break;  
 
  452                     s = 
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q + r * r));
 
  458                             H(k, k - 1) = -s * x;
 
  461                                 H(k, k - 1) = -H(k, k - 1);
 
  477                         SCALARTYPE* a_row_k = H.
row(k);
 
  478                         SCALARTYPE* a_row_k_1 = H.row(k + 1);
 
  479                         SCALARTYPE* a_row_k_2 = H.row(k + 2);
 
  481                         for (
int j = k; j < nn; j++)
 
  483                             SCALARTYPE h_kj = a_row_k[j];
 
  484                             SCALARTYPE h_k1_j = a_row_k_1[j];
 
  486                             p = h_kj + q * h_k1_j;
 
  489                                 SCALARTYPE h_k2_j = a_row_k_2[j];
 
  491                                 a_row_k_2[j] = h_k2_j - p * z;
 
  494                             a_row_k[j] = h_kj - p * x;
 
  495                             a_row_k_1[j] = h_k1_j - p * y;
 
  502                         for (
int i = k; i < 
std::min(nn, k + 4); i++)
 
  504                             p = x * H(i, k) + y * H(i, k + 1);
 
  507                                 p = p + z * H(i, k + 2);
 
  508                                 H(i, k + 2) = H(i, k + 2) - p * r;
 
  511                             H(i, k) = H(i, k) - p;
 
  512                             H(i, k + 1) = H(i, k + 1) - p * q;
 
  528                 update_float_QR_column<SCALARTYPE>(H, buf, m, n, n, 
true);
 
  541         for (n = nn - 1; n >= 0; n--)
 
  547             if (q <= 0 && q >= 0)
 
  551                 for (
int i = n - 1; i >= 0; i--)
 
  555                     for (
int j = l; j <= n; j++)
 
  556                         r = r + H(i, j) * H(j, n);
 
  568                             H(i, n) = (w > 0 || w < 0) ? (-r / w) : (-r / (eps * norm));
 
  576                             t = (x * s - z * r) / q;
 
  578                             H(i + 1, n) = (std::fabs(x) > std::fabs(z)) ? ((-r - w * t) / x) : ((-s - y * t) / z);
 
  582                         t = std::fabs(H(i, n));
 
  583                         if ((eps * t) * t > 1)
 
  584                             for (
int j = i; j <= n; j++)
 
  595                 if (std::fabs(H(n, n - 1)) > std::fabs(H(n - 1, n)))
 
  597                     H(n - 1, n - 1) = q / H(n, n - 1);
 
  598                     H(n - 1, n) = -(H(n, n) - p) / H(n, n - 1);
 
  602                     cdiv<SCALARTYPE>(0, -H(n - 1, n), H(n - 1, n - 1) - p, q, out1, out2);
 
  604                     H(n - 1, n - 1) = out1;
 
  610                 for (
int i = n - 2; i >= 0; i--)
 
  612                     SCALARTYPE ra, sa, vr, vi;
 
  615                     for (
int j = l; j <= n; j++)
 
  617                         SCALARTYPE h_ij = H(i, j);
 
  618                         ra = ra + h_ij * H(j, n - 1);
 
  619                         sa = sa + h_ij * H(j, n);
 
  635                             cdiv<SCALARTYPE>(-ra, -sa, w, q, out1, out2);
 
  646                             if ( (vr <= 0 && vr >= 0) && (vi <= 0 && vi >= 0) )
 
  647                                 vr = eps * norm * (std::fabs(w) + std::fabs(q) + std::fabs(x) + std::fabs(y) + std::fabs(z));
 
  649                             cdiv<SCALARTYPE>(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi, out1, out2);
 
  655                             if (std::fabs(x) > (std::fabs(z) + std::fabs(q)))
 
  657                                 H(i + 1, n - 1) = (-ra - w * H(i, n - 1) + q * H(i, n)) / x;
 
  658                                 H(i + 1, n) = (-sa - w * H(i, n) - q * H(i, n - 1)) / x;
 
  662                                 cdiv<SCALARTYPE>(-r - y * H(i, n - 1), -s - y * H(i, n), z, q, out1, out2);
 
  664                                 H(i + 1, n - 1) = out1;
 
  670                         t = 
std::max(std::fabs(H(i, n - 1)), std::fabs(H(i, n)));
 
  671                         if ((eps * t) * t > 1)
 
  673                             for (
int j = i; j <= n; j++)
 
  693     template <
typename SCALARTYPE>
 
  701         if(start + 2 >= A_size1)
 
  712     template <
typename SCALARTYPE>
 
  727     template <
typename SCALARTYPE>
 
  730                    std::vector<SCALARTYPE> & D,
 
  731                    std::vector<SCALARTYPE> & E,
 
  732                    bool is_symmetric = 
true)
 
  735         assert(A.
size1() == A.
size2() && bool(
"Input matrix must be square for QR method!"));
 
  772         boost::numeric::ublas::matrix<SCALARTYPE> eigen_values(A.
size1(), A.
size1());
 
  773         eigen_values.clear();
 
  777             if(std::fabs(E[i]) < 
EPS)
 
  779                 eigen_values(i, i) = D[i];
 
  783                 eigen_values(i, i) = D[i];
 
  784                 eigen_values(i, i + 1) = E[i];
 
  785                 eigen_values(i + 1, i) = -E[i];
 
  786                 eigen_values(i + 1, i + 1) = D[i];
 
  791         copy(eigen_values, A);
 
  795 template <
typename SCALARTYPE>
 
  798                    std::vector<SCALARTYPE>& D,
 
  799                    std::vector<SCALARTYPE>& E
 
  805 template <
typename SCALARTYPE>
 
  808                    std::vector<SCALARTYPE>& D
 
  811     std::vector<SCALARTYPE> E(A.
size1());
 
  816 template <
typename SCALARTYPE>
 
  822     std::vector<SCALARTYPE> std_D(D.
size());
 
  823     std::vector<SCALARTYPE> E(A.
size1());
 
void final_iter_update(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Internal helper class representing a row-major dense matrix used for the QR method for the purpose of...
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL. 
Implementation of the dense matrix class. 
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
void tql2(matrix_base< SCALARTYPE, F > &Q, VectorType &d, VectorType &e)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void final_iter_update_gpu(matrix_base< SCALARTYPE > &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
const std::string SVD_UPDATE_QR_COLUMN_KERNEL
Implementation of the tql2-algorithm for eigenvalue computations. 
void transpose(matrix_base< SCALARTYPE > &A)
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector. 
T max(const T &lhs, const T &rhs)
Maximum. 
void prepare_householder_vector(matrix_base< SCALARTYPE > &A, vector_base< SCALARTYPE > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
void update_float_QR_column_gpu(matrix_base< SCALARTYPE > &A, const VectorType &buf, viennacl::vector< SCALARTYPE > &buf_vcl, int m, int n, int last_n, bool)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Common routines used for the QR method and SVD. Experimental. 
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context. 
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
This function stores the diagonal and the superdiagonal of a matrix in two vectors. 
result_of::size_type< T >::type start(T const &obj)
void qr_method_nsm(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E)
Common base class for dense vectors, vector ranges, and vector slices. 
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
size_type size2() const
Returns the number of columns. 
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void tridiagonal_reduction(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q)
size_type size1() const
Returns the number of rows. 
std::vector< SCALARTYPE > data
void qr_method(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D, std::vector< SCALARTYPE > &E, bool is_symmetric=true)
SCALARTYPE & operator()(int i, int j)
void update_float_QR_column(MatrixT &A, const std::vector< SCALARTYPE > &buf, int m, int n, int last_i, bool is_triangular)
FastMatrix(vcl_size_t sz, vcl_size_t internal_size)
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue. 
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const 
Returns the length of the vector (cf. std::vector) 
const std::string SVD_FINAL_ITER_UPDATE_KERNEL
void hqr2(viennacl::matrix< SCALARTYPE > &vcl_H, viennacl::matrix< SCALARTYPE > &V, VectorType &d, VectorType &e)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
size_type internal_size1() const 
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
T min(const T &lhs, const T &rhs)
Minimum. 
bool householder_twoside(matrix_base< SCALARTYPE > &A, matrix_base< SCALARTYPE > &Q, vector_base< SCALARTYPE > &D, vcl_size_t start)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
void qr_method_sym(viennacl::matrix< SCALARTYPE > &A, viennacl::matrix< SCALARTYPE > &Q, std::vector< SCALARTYPE > &D)