1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_ 
    2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_ 
   39 #ifdef VIENNACL_WITH_OPENMP 
   44 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE 
   45   #define VIENNACL_OPENMP_VECTOR_MIN_SIZE  5000 
   56   template<
typename NumericT>
 
   58   inline unsigned long  flip_sign(
unsigned long  val) { 
return val; }
 
   59   inline unsigned int   flip_sign(
unsigned int   val) { 
return val; }
 
   60   inline unsigned short flip_sign(
unsigned short val) { 
return val; }
 
   61   inline unsigned char  flip_sign(
unsigned char  val) { 
return val; }
 
   67 template<
typename DestNumericT, 
typename SrcNumericT>
 
   70   DestNumericT      * data_dest = detail::extract_raw_pointer<DestNumericT>(dest);
 
   71   SrcNumericT 
const * data_src  = detail::extract_raw_pointer<SrcNumericT>(src);
 
   80 #ifdef VIENNACL_WITH_OPENMP 
   81   #pragma omp parallel for if (size_dest > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
   83   for (
long i = 0; i < static_cast<long>(size_dest); ++i)
 
   84     data_dest[static_cast<vcl_size_t>(i)*inc_dest+start_dest] = 
static_cast<DestNumericT
>(data_src[
static_cast<vcl_size_t>(i)*inc_src+start_src]);
 
   87 template<
typename NumericT, 
typename ScalarT1>
 
   93   value_type       * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
   94   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
   96   value_type data_alpha = alpha;
 
  107   if (reciprocal_alpha)
 
  109 #ifdef VIENNACL_WITH_OPENMP 
  110     #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  112     for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  113       data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
 
  117 #ifdef VIENNACL_WITH_OPENMP 
  118     #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  120     for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  121       data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
 
  126 template<
typename NumericT, 
typename ScalarT1, 
typename ScalarT2>
 
  133   value_type       * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  134   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
  135   value_type 
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
 
  137   value_type data_alpha = alpha;
 
  141   value_type data_beta = beta;
 
  155   if (reciprocal_alpha)
 
  159 #ifdef VIENNACL_WITH_OPENMP 
  160       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  162       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  163         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
 
  167 #ifdef VIENNACL_WITH_OPENMP 
  168       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  170       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  171         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
 
  178 #ifdef VIENNACL_WITH_OPENMP 
  179       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  181       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  182         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
 
  186 #ifdef VIENNACL_WITH_OPENMP 
  187       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  189       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  190         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
 
  196 template<
typename NumericT, 
typename ScalarT1, 
typename ScalarT2>
 
  203   value_type       * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  204   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
  205   value_type 
const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
 
  207   value_type data_alpha = alpha;
 
  211   value_type data_beta = beta;
 
  225   if (reciprocal_alpha)
 
  229 #ifdef VIENNACL_WITH_OPENMP 
  230       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  232       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  233         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
 
  237 #ifdef VIENNACL_WITH_OPENMP 
  238       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  240       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  241         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
 
  248 #ifdef VIENNACL_WITH_OPENMP 
  249       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  251       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  252         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
 
  256 #ifdef VIENNACL_WITH_OPENMP 
  257       #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  259       for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  260         data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] += data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
 
  274 template<
typename NumericT>
 
  279   value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  286   value_type data_alpha = 
static_cast<value_type
>(alpha);
 
  288 #ifdef VIENNACL_WITH_OPENMP 
  289   #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  291   for (
long i = 0; i < static_cast<long>(loop_bound); ++i)
 
  292     data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1] = data_alpha;
 
  301 template<
typename NumericT>
 
  306   value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  307   value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
  316 #ifdef VIENNACL_WITH_OPENMP 
  317   #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  319   for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  321     value_type temp = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
 
  322     data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
 
  323     data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = temp;
 
  335 template<
typename NumericT, 
typename OpT>
 
  342   value_type       * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  343   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
 
  344   value_type 
const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
 
  356 #ifdef VIENNACL_WITH_OPENMP 
  357   #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  359   for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  360     OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
 
  368 template<
typename NumericT, 
typename OpT>
 
  375   value_type       * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  376   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
 
  385 #ifdef VIENNACL_WITH_OPENMP 
  386   #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  388   for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
  389     OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1], data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2]);
 
  405 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \ 
  406   inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \ 
  407                                        RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \ 
  408     TEMPSCALART temp = 0; 
  410 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \ 
  411     for (long i = 0; i < static_cast<long>(size1); ++i) \ 
  412       temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \ 
  413     return static_cast<RESULTSCALART>(temp); \ 
  418 #ifdef VIENNACL_WITH_OPENMP 
  419   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  424 #ifdef VIENNACL_WITH_OPENMP 
  425   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  432 #ifdef VIENNACL_WITH_OPENMP 
  433   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  438 #ifdef VIENNACL_WITH_OPENMP 
  439   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  446 #ifdef VIENNACL_WITH_OPENMP 
  447   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  452 #ifdef VIENNACL_WITH_OPENMP 
  453   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  460 #ifdef VIENNACL_WITH_OPENMP 
  461   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  466 #ifdef VIENNACL_WITH_OPENMP 
  467   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  474 #ifdef VIENNACL_WITH_OPENMP 
  475   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  481 #ifdef VIENNACL_WITH_OPENMP 
  482   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  486 #undef VIENNACL_INNER_PROD_IMPL_1 
  487 #undef VIENNACL_INNER_PROD_IMPL_2 
  496 template<
typename NumericT, 
typename ScalarT>
 
  503   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  504   value_type 
const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
  514                                    data_vec2, start2, inc2);  
 
  517 template<
typename NumericT>
 
  524   value_type 
const * data_x = detail::extract_raw_pointer<value_type>(x);
 
  530   std::vector<value_type> temp(vec_tuple.
const_size());
 
  531   std::vector<value_type const *> data_y(vec_tuple.
const_size());
 
  532   std::vector<vcl_size_t> start_y(vec_tuple.
const_size());
 
  533   std::vector<vcl_size_t> stride_y(vec_tuple.
const_size());
 
  537     data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.
const_at(j));
 
  545     value_type entry_x = data_x[i*inc_x+start_x];
 
  547       temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
 
  558 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \ 
  559   inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \ 
  560     TEMPSCALART temp = 0; 
  562 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \ 
  563     for (long i = 0; i < static_cast<long>(size1); ++i) \ 
  564       temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \ 
  565     return static_cast<RESULTSCALART>(temp); \ 
  570 #ifdef VIENNACL_WITH_OPENMP 
  571   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  576 #ifdef VIENNACL_WITH_OPENMP 
  577   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  583 #ifdef VIENNACL_WITH_OPENMP 
  584   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  589 #ifdef VIENNACL_WITH_OPENMP 
  590   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  597 #ifdef VIENNACL_WITH_OPENMP 
  598   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  603 #ifdef VIENNACL_WITH_OPENMP 
  604   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  611 #ifdef VIENNACL_WITH_OPENMP 
  612   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  617 #ifdef VIENNACL_WITH_OPENMP 
  618   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  625 #ifdef VIENNACL_WITH_OPENMP 
  626   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  632 #ifdef VIENNACL_WITH_OPENMP 
  633   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  637 #undef VIENNACL_NORM_1_IMPL_1 
  638 #undef VIENNACL_NORM_1_IMPL_2 
  647 template<
typename NumericT, 
typename ScalarT>
 
  653   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  667 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \ 
  668   inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \ 
  669     TEMPSCALART temp = 0; 
  671 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \ 
  672     for (long i = 0; i < static_cast<long>(size1); ++i) { \ 
  673       RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \ 
  674       temp += static_cast<TEMPSCALART>(data * data); \ 
  676     return static_cast<RESULTSCALART>(temp); \ 
  681 #ifdef VIENNACL_WITH_OPENMP 
  682   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  687 #ifdef VIENNACL_WITH_OPENMP 
  688   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  695 #ifdef VIENNACL_WITH_OPENMP 
  696   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  701 #ifdef VIENNACL_WITH_OPENMP 
  702   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  709 #ifdef VIENNACL_WITH_OPENMP 
  710   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  715 #ifdef VIENNACL_WITH_OPENMP 
  716   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  723 #ifdef VIENNACL_WITH_OPENMP 
  724   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  729 #ifdef VIENNACL_WITH_OPENMP 
  730   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  737 #ifdef VIENNACL_WITH_OPENMP 
  738   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  744 #ifdef VIENNACL_WITH_OPENMP 
  745   #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  749 #undef VIENNACL_NORM_2_IMPL_1 
  750 #undef VIENNACL_NORM_2_IMPL_2 
  760 template<
typename NumericT, 
typename ScalarT>
 
  766   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  780 template<
typename NumericT, 
typename ScalarT>
 
  786   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  794   #ifdef VIENNACL_WITH_OPENMP 
  796       thread_count = omp_get_max_threads();
 
  799   std::vector<value_type> temp(thread_count);
 
  801 #ifdef VIENNACL_WITH_OPENMP 
  802   #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  806 #ifdef VIENNACL_WITH_OPENMP 
  807     id = omp_get_thread_num();
 
  810     vcl_size_t begin = (size1 * id) / thread_count;
 
  811     vcl_size_t end   = (size1 * (
id + 1)) / thread_count;
 
  815       temp[
id] = std::max<value_type>(temp[
id], static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))));  
 
  818     temp[0] = std::max<value_type>( temp[0], temp[i]);
 
  830 template<
typename NumericT>
 
  835   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  842 #ifdef VIENNACL_WITH_OPENMP 
  844       thread_count = omp_get_max_threads();
 
  847   std::vector<value_type> temp(thread_count);
 
  848   std::vector<vcl_size_t> index(thread_count);
 
  850 #ifdef VIENNACL_WITH_OPENMP 
  851   #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  855 #ifdef VIENNACL_WITH_OPENMP 
  856     id = omp_get_thread_num();
 
  858     vcl_size_t begin = (size1 * id) / thread_count;
 
  859     vcl_size_t end   = (size1 * (
id + 1)) / thread_count;
 
  866       data = 
static_cast<value_type
>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])));  
 
  876     if (temp[i] > temp[0])
 
  890 template<
typename NumericT, 
typename ScalarT>
 
  896   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  904 #ifdef VIENNACL_WITH_OPENMP 
  906       thread_count = omp_get_max_threads();
 
  909   std::vector<value_type> temp(thread_count);
 
  911 #ifdef VIENNACL_WITH_OPENMP 
  912   #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  916 #ifdef VIENNACL_WITH_OPENMP 
  917     id = omp_get_thread_num();
 
  919     vcl_size_t begin = (size1 * id) / thread_count;
 
  920     vcl_size_t end   = (size1 * (
id + 1)) / thread_count;
 
  921     temp[id]         = data_vec1[
start1];
 
  925       value_type v = data_vec1[i*inc1+
start1];
 
  926       temp[id] = std::max<value_type>(temp[id],v);
 
  930     temp[0] = std::max<value_type>( temp[0], temp[i]);
 
  939 template<
typename NumericT, 
typename ScalarT>
 
  945   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
  953 #ifdef VIENNACL_WITH_OPENMP 
  955       thread_count = omp_get_max_threads();
 
  958   std::vector<value_type> temp(thread_count);
 
  960 #ifdef VIENNACL_WITH_OPENMP 
  961   #pragma omp parallel if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
  965 #ifdef VIENNACL_WITH_OPENMP 
  966     id = omp_get_thread_num();
 
  968     vcl_size_t begin = (size1 * id) / thread_count;
 
  969     vcl_size_t end   = (size1 * (
id + 1)) / thread_count;
 
  970     temp[id]         = data_vec1[
start1];
 
  974       value_type v = data_vec1[i*inc1+
start1];
 
  975       temp[id] = std::min<value_type>(temp[id],v);
 
  979     temp[0] = std::min<value_type>( temp[0], temp[i]);
 
  988 template<
typename NumericT, 
typename ScalarT>
 
  994   value_type 
const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
 1000   value_type temp = 0;
 
 1001 #ifdef VIENNACL_WITH_OPENMP 
 1002   #pragma omp parallel for reduction(+:temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
 1004   for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
 1005     temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+
start1];
 
 1019 template<
typename NumericT>
 
 1026   value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
 
 1027   value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
 
 1036   value_type data_alpha = alpha;
 
 1037   value_type data_beta  = beta;
 
 1039 #ifdef VIENNACL_WITH_OPENMP 
 1040   #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 
 1042   for (
long i = 0; i < static_cast<long>(
size1); ++i)
 
 1044     value_type temp1 = data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1];
 
 1045     value_type temp2 = data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2];
 
 1047     data_vec1[
static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
 
 1048     data_vec2[
static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
 
 1055   template<
typename NumericT>
 
 1060     NumericT const * data_vec1 = detail::extract_raw_pointer<NumericT>(vec1);
 
 1061     NumericT       * data_vec2 = detail::extract_raw_pointer<NumericT>(vec2);
 
 1072 #ifdef VIENNACL_WITH_OPENMP 
 1075       std::vector<NumericT> thread_results(omp_get_max_threads());
 
 1078       #pragma omp parallel 
 1080         vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
 
 1081         vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
 
 1082         vcl_size_t thread_stop  = std::min<vcl_size_t>(thread_start + work_per_thread, 
size1);
 
 1085         for(
vcl_size_t i = thread_start; i < thread_stop; i++)
 
 1086           thread_sum += data_vec1[i * inc1 + start1];
 
 1088         thread_results[omp_get_thread_num()] = thread_sum;
 
 1093       for (
vcl_size_t i=0; i<thread_results.size(); ++i)
 
 1096         thread_results[i] = current_offset;
 
 1097         current_offset += tmp;
 
 1101       #pragma omp parallel 
 1103         vcl_size_t work_per_thread = (size1 - 1) / thread_results.size() + 1;
 
 1104         vcl_size_t thread_start = work_per_thread * omp_get_thread_num();
 
 1105         vcl_size_t thread_stop  = std::min<vcl_size_t>(thread_start + work_per_thread, 
size1);
 
 1107         NumericT thread_sum = thread_results[omp_get_thread_num()];
 
 1110           for(
vcl_size_t i = thread_start; i < thread_stop; i++)
 
 1112             thread_sum += data_vec1[i * inc1 + 
start1];
 
 1113             data_vec2[i * inc2 + 
start2] = thread_sum;
 
 1118           for(
vcl_size_t i = thread_start; i < thread_stop; i++)
 
 1121             data_vec2[i * inc2 + 
start2] = thread_sum;
 
 1134           sum += data_vec1[i * inc1 + 
start1];
 
 1160 template<
typename NumericT>
 
 1175 template<
typename NumericT>
 
vcl_size_t const_size() const 
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void norm_2_impl(vector_base< T > const &vec, scalar< T > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void inclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan on the host using OpenMP. 
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector. 
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector. 
Generic size and resize functionality for different vector and matrix types. 
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector. 
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void sum_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the sum of all elements from the vector. 
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
Worker class for decomposing expression templates. 
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL. 
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies. 
An expression template class that represents a binary operation that yields a vector. 
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice) 
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.) 
result_of::size_type< T >::type start2(T const &obj)
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation. 
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus. 
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the minimum of a vector. 
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied. 
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
NumericT flip_sign(NumericT val)
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common base class for dense vectors, vector ranges, and vector slices. 
Common routines for single-threaded or OpenMP-enabled execution on CPU. 
void exclusive_scan(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan on the host using OpenMP. 
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc. 
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void vector_scan_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > &vec2, bool is_inclusive)
Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend. 
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector. 
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const 
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size() const 
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors. 
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ...,  and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class. 
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary. 
#define VIENNACL_OPENMP_VECTOR_MIN_SIZE
Simple enable-if variant that uses the SFINAE pattern.