1 #ifndef VIENNACL_LINALG_CUDA_BISECT_KERNEL_SMALL_HPP_ 
    2 #define VIENNACL_LINALG_CUDA_BISECT_KERNEL_SMALL_HPP_ 
   58 template<
typename NumericT>
 
   63              unsigned int *g_left_count, 
unsigned int *g_right_count,
 
   65              const unsigned int lg_eig_count, 
const unsigned int ug_eig_count,
 
   80     __shared__  
unsigned int 
   86     __shared__  
unsigned int compact_second_chunk;
 
   87     __shared__  
unsigned int all_threads_converged;
 
   90     __shared__  
unsigned int num_threads_active;
 
   93     __shared__  
unsigned int num_threads_compaction;
 
   96     unsigned int *s_compaction_list_exc = s_compaction_list + 1;
 
  103     unsigned int left_count = 0;
 
  104     unsigned int right_count = 0;
 
  108     unsigned int mid_count = 0;
 
  110     unsigned int  is_active_second = 0;
 
  112     s_compaction_list[threadIdx.x] = 0;
 
  113     s_left[threadIdx.x] = 0;
 
  114     s_right[threadIdx.x] = 0;
 
  115     s_left_count[threadIdx.x] = 0;
 
  116     s_right_count[threadIdx.x] = 0;
 
  121     if (0 == threadIdx.x)
 
  125         s_left_count[0] = lg_eig_count;
 
  126         s_right_count[0] = ug_eig_count;
 
  128         compact_second_chunk = 0;
 
  129         num_threads_active = 1;
 
  131         num_threads_compaction = 1;
 
  139         all_threads_converged = 1;
 
  142         is_active_second = 0;
 
  144                                 s_left, s_right, s_left_count, s_right_count,
 
  146                                 left, right, left_count, right_count,
 
  147                                 mid, all_threads_converged);
 
  152         if (1 == all_threads_converged)
 
  165                                                threadIdx.x, num_threads_active,
 
  181         if (threadIdx.x < num_threads_active)
 
  189                                        s_left, s_right, s_left_count, s_right_count,
 
  191                                        left_count, mid_count, right_count,
 
  192                                        epsilon, compact_second_chunk,
 
  193                                        s_compaction_list_exc,
 
  201                                        left_count, mid_count, right_count,
 
  202                                        s_compaction_list_exc, compact_second_chunk,
 
  214         if (compact_second_chunk > 0)
 
  220                              mid, right, mid_count, right_count,
 
  221                              s_compaction_list, num_threads_active,
 
  227         if (0 == threadIdx.x)
 
  231             num_threads_active += s_compaction_list[num_threads_active];
 
  233             num_threads_compaction = 
ceilPow2(num_threads_active);
 
  235             compact_second_chunk = 0;
 
  253         g_left[threadIdx.x]  = s_left[threadIdx.x];
 
  255         g_left_count[threadIdx.x]  = s_left_count[threadIdx.x];
 
  261 #endif // #ifndef _BISECT_KERNEL_SMALL_H_ 
__global__ void bisectKernelSmall(const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon)
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. 
__device__ void createIndicesCompaction(T *s_compaction_list_exc, unsigned int num_threads_compaction)
__device__ unsigned int computeNumSmallerEigenvals(const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged)
__device__ void storeNonEmptyIntervals(unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second)
Store all non-empty intervals resulting from the subdivision of the interval currently processed by t...
__device__ int ceilPow2(int n)
Global configuration parameters. 
__device__ void compactIntervals(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second)
Perform stream compaction for second child intervals. 
#define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX
__device__ void subdivideActiveIntervalMulti(const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged)
Subdivide interval if active and not already converged. 
__device__ void storeIntervalConverged(NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second)