1 #ifndef VIENNACL_LINALG_CUDA_BISECT_BISECT_UTIL_HPP_ 
    2 #define VIENNACL_LINALG_CUDA_BISECT_BISECT_UTIL_HPP_ 
   56     frexp((
float)n, &exp);
 
   57     return (1 << (exp - 1));
 
   76     frexp((
float)n, &exp);
 
   86 template<
typename NumericT>
 
   96         mid = left + (right - left) * 0.5f;
 
  100         mid = (left + right) * 0.5f;
 
  121 template<
class S, 
class T, 
class NumericT>
 
  126               T *s_left_count, T *s_right_count,
 
  128               S left_count, S right_count,
 
  131     s_left_count[addr] = left_count;
 
  132     s_right_count[addr] = right_count;
 
  136     NumericT t1 = 
max(abs(left), abs(right)) * precision;
 
  144         s_left[addr] = lambda;
 
  145         s_right[addr] = lambda;
 
  152         s_right[addr] = right;
 
  174 template<
typename NumericT>
 
  179                            const unsigned int tid,
 
  180                            const unsigned int num_intervals_active,
 
  182                            unsigned int converged
 
  187     unsigned int count = 0;
 
  194         s_d[threadIdx.x] = *(g_d + threadIdx.x);
 
  195         s_s[threadIdx.x] = *(g_s + threadIdx.x - 1);
 
  201     if ((tid < num_intervals_active) && (0 == converged))
 
  206         for (
unsigned int k = 0; k < n; ++k)
 
  208             delta = s_d[k] - x - (s_s[k] * s_s[k]) / delta;
 
  209             count += (delta < 0) ? 1 : 0;
 
  234 template<
typename NumericT>
 
  239                                 const unsigned int tid,
 
  240                                 const unsigned int num_intervals_active,
 
  242                                 unsigned int converged
 
  246     unsigned int count = 0;
 
  248     unsigned int rem = n;
 
  251     for (
unsigned int i = 0; i < n; i += blockDim.x)
 
  257         if ((i + threadIdx.x) < n)
 
  260             s_d[threadIdx.x] = *(g_d + i + threadIdx.x);
 
  261             s_s[threadIdx.x] = *(g_s + i + threadIdx.x - 1);
 
  267         if (tid < num_intervals_active)
 
  272             for (
unsigned int k = 0; k < 
min(rem,blockDim.x); ++k)
 
  274                 delta = s_d[k] - x - (s_s[k] * s_s[k]) / delta;
 
  276                 count += (delta < 0) ? 1 : 0;
 
  306 template<
class S, 
class T, 
class NumericT>
 
  310                        const unsigned int num_threads_active,
 
  312                        T  *s_left_count, T *s_right_count,
 
  318                        unsigned int &compact_second_chunk,
 
  319                        T *s_compaction_list_exc,
 
  320                        unsigned int &is_active_second)
 
  324     if ((left_count != mid_count) && (mid_count != right_count))
 
  328         storeInterval(addr, s_left, s_right, s_left_count, s_right_count,
 
  329                       left, mid, left_count, mid_count, precision);
 
  333         is_active_second = 1;
 
  334         s_compaction_list_exc[threadIdx.x] = 1;
 
  335         compact_second_chunk = 1;
 
  343         is_active_second = 0;
 
  344         s_compaction_list_exc[threadIdx.x] = 0;
 
  347         if (left_count != mid_count)
 
  349             storeInterval(addr, s_left, s_right, s_left_count, s_right_count,
 
  350                           left, mid, left_count, mid_count, precision);
 
  354             storeInterval(addr, s_left, s_right, s_left_count, s_right_count,
 
  355                           mid, right, mid_count, right_count, precision);
 
  374                         unsigned int num_threads_compaction)
 
  377     unsigned int offset = 1;
 
  378     const unsigned int tid = threadIdx.x;
 
  383     for (
int d = (num_threads_compaction >> 1); d > 0; d >>= 1)
 
  391             unsigned int  ai = offset*(2*tid+1)-1;
 
  392             unsigned int  bi = offset*(2*tid+2)-1;
 
  394             s_compaction_list_exc[bi] =   s_compaction_list_exc[bi]
 
  395                                           + s_compaction_list_exc[ai];
 
  402     for (
int d = 2; d < num_threads_compaction; d <<= 1)
 
  411             unsigned int  ai = offset*(tid+1) - 1;
 
  412             unsigned int  bi = ai + (offset >> 1);
 
  414             s_compaction_list_exc[bi] =   s_compaction_list_exc[bi]
 
  415                                           + s_compaction_list_exc[ai];
 
  437 template<
class T, 
class NumericT>
 
  441                  T *s_left_count, T *s_right_count,
 
  443                  unsigned int mid_count, 
unsigned int right_count,
 
  444                  T *s_compaction_list,
 
  445                  unsigned int num_threads_active,
 
  446                  unsigned int is_active_second)
 
  448     const unsigned int tid = threadIdx.x;
 
  452     if ((tid < num_threads_active) && (1 == is_active_second))
 
  454         unsigned int addr_w = num_threads_active + s_compaction_list[tid];
 
  455         s_left[addr_w] = mid;
 
  456         s_right[addr_w] = right;
 
  457         s_left_count[addr_w] = mid_count;
 
  458         s_right_count[addr_w] = right_count;
 
  462 template<
class T, 
class S, 
class NumericT>
 
  466                        T *s_left_count, T *s_right_count,
 
  468                        S &left_count, S &mid_count, S &right_count,
 
  469                        T *s_compaction_list_exc,
 
  470                        unsigned int &compact_second_chunk,
 
  471                        const unsigned int num_threads_active,
 
  472                        unsigned int &is_active_second)
 
  474     const unsigned int tid = threadIdx.x;
 
  475     const unsigned int multiplicity = right_count - left_count;
 
  477     if (1 == multiplicity)
 
  482         s_right[tid] = right;
 
  483         s_left_count[tid] = left_count;
 
  484         s_right_count[tid] = right_count;
 
  488         is_active_second = 0;
 
  489         s_compaction_list_exc[tid] = 0;
 
  495         mid_count = left_count + (multiplicity >> 1);
 
  499         s_right[tid] = right;
 
  500         s_left_count[tid] = left_count;
 
  501         s_right_count[tid] = mid_count;
 
  505         is_active_second = 1;
 
  506         s_compaction_list_exc[tid] = 1;
 
  507         compact_second_chunk = 1;
 
  526 template<
class T, 
class NumericT>
 
  531                         T *s_left_count, T *s_right_count,
 
  532                         const unsigned int num_threads_active,
 
  534                         unsigned int &left_count, 
unsigned int &right_count,
 
  535                         NumericT &mid, 
unsigned int &all_threads_converged)
 
  538   if (tid < num_threads_active)
 
  542     right = s_right[tid];
 
  543     left_count = s_left_count[tid];
 
  544     right_count = s_right_count[tid];
 
  551       all_threads_converged = 0;
 
  553     else if ((right_count - left_count) > 1)
 
  557       all_threads_converged = 0;
 
  579 template<
class T, 
class NumericT>
 
  584                         T *s_left_count, T *s_right_count,
 
  585                         const unsigned int num_threads_active,
 
  587                         unsigned int &left_count, 
unsigned int &right_count,
 
  588                         NumericT &mid, 
unsigned int &all_threads_converged)
 
  591   if (tid < num_threads_active)
 
  595     right = s_right[tid];
 
  596     left_count = s_left_count[tid];
 
  597     right_count = s_right_count[tid];
 
  604       all_threads_converged = 0;
 
  612 #endif // #ifndef VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_ 
__device__ void createIndicesCompaction(T *s_compaction_list_exc, unsigned int num_threads_compaction)
__device__ unsigned int computeNumSmallerEigenvals(const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
__device__ void storeNonEmptyIntervals(unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
Store all non-empty intervals resulting from the subdivision of the interval currently processed by t...
#define VIENNACL_BISECT_MIN_ABS_INTERVAL
__device__ int floorPow2(int n)
__device__ int ceilPow2(int n)
Global configuration parameters. 
__device__ void compactIntervals(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
Perform stream compaction for second child intervals. 
__device__ void storeInterval(unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision)
__device__ void subdivideActiveInterval(const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
Subdivide interval if active and not already converged. 
__device__ void subdivideActiveIntervalMulti(const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
Subdivide interval if active and not already converged. 
__device__ NumericT computeMidpoint(const NumericT left, const NumericT right)
__device__ void storeIntervalConverged(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)
float sign_f(const float &val)
Sign of number (float) 
NumericT max(std::vector< NumericT > const &v1)
__device__ unsigned int computeNumSmallerEigenvalsLarge(const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
NumericT min(std::vector< NumericT > const &v1)