1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP 
    2 #define VIENNACL_LINALG_OPENCL_KERNELS_AMG_HPP 
   23 template<
typename StringT>
 
   27  source.append(
"__kernel void amg_influence_trivial( \n");
 
   28  source.append(
"  __global const unsigned int * A_row_indices, \n");
 
   29  source.append(
"  __global const unsigned int * A_col_indices, \n");
 
   30  source.append(
"  unsigned int A_size1, \n");
 
   31  source.append(
"  unsigned int A_nnz, \n");
 
   32  source.append(
"  __global unsigned int * influences_row, \n");
 
   33  source.append(
"  __global unsigned int * influences_id, \n");
 
   34  source.append(
"  __global unsigned int * influences_values) { \n");
 
   36  source.append(
"  for (unsigned int i = get_global_id(0); i < A_size1; i += get_global_size(0)) \n");
 
   37  source.append(
"  { \n");
 
   38  source.append(
"    unsigned int tmp = A_row_indices[i]; \n");
 
   39  source.append(
"    influences_row[i] = tmp; \n");
 
   40  source.append(
"    influences_values[i] = A_row_indices[i+1] - tmp; \n");
 
   41  source.append(
"  } \n");
 
   43  source.append(
"  for (unsigned int i = get_global_id(0); i < A_nnz; i += get_global_size(0)) \n");
 
   44  source.append(
"    influences_id[i] = A_col_indices[i]; \n");
 
   46  source.append(
"  if (get_global_id(0) == 0) \n");
 
   47  source.append(
"    influences_row[A_size1] = A_row_indices[A_size1]; \n");
 
   48  source.append(
"} \n");
 
   53 template<
typename StringT>
 
   57  source.append(
"__kernel void amg_pmis2_init_workdata( \n");
 
   58  source.append(
"  __global unsigned int       *work_state, \n");
 
   59  source.append(
"  __global unsigned int       *work_random, \n");
 
   60  source.append(
"  __global unsigned int       *work_index, \n");
 
   61  source.append(
"  __global unsigned int const *point_types, \n");
 
   62  source.append(
"  __global unsigned int const *random_weights, \n");
 
   63  source.append(
"  unsigned int size) { \n");
 
   65  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
 
   66  source.append(
"    switch (point_types[i]) { \n");
 
   67  source.append(
"    case 0:  work_state[i] = 1; break; \n"); 
 
   68  source.append(
"    case 1:  work_state[i] = 2; break; \n"); 
 
   69  source.append(
"    case 2:  work_state[i] = 0; break; \n"); 
 
   71  source.append(
"    default: break; // do nothing \n");
 
   72  source.append(
"    } \n");
 
   74  source.append(
"    work_random[i] = random_weights[i]; \n");
 
   75  source.append(
"    work_index[i]  = i; \n");
 
   76  source.append(
"  } \n");
 
   77  source.append(
"} \n");
 
   82 template<
typename StringT>
 
   86  source.append(
"__kernel void amg_pmis2_max_neighborhood( \n");
 
   87  source.append(
"  __global unsigned int       *work_state, \n");
 
   88  source.append(
"  __global unsigned int       *work_random, \n");
 
   89  source.append(
"  __global unsigned int       *work_index, \n");
 
   90  source.append(
"  __global unsigned int       *work_state2, \n");
 
   91  source.append(
"  __global unsigned int       *work_random2, \n");
 
   92  source.append(
"  __global unsigned int       *work_index2, \n");
 
   93  source.append(
"  __global unsigned int const *influences_row, \n");
 
   94  source.append(
"  __global unsigned int const *influences_id, \n");
 
   95  source.append(
"  unsigned int size) { \n");
 
   97  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
 
  100  source.append(
"    unsigned int state  = work_state[i]; \n");
 
  101  source.append(
"    unsigned int random = work_random[i]; \n");
 
  102  source.append(
"    unsigned int index  = work_index[i]; \n");
 
  105  source.append(
"    unsigned int j_stop = influences_row[i + 1]; \n");
 
  106  source.append(
"    for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
 
  107  source.append(
"      unsigned int influenced_point_id = influences_id[j]; \n");
 
  110  source.append(
"      if (state < work_state[influenced_point_id]) { \n");
 
  111  source.append(
"        state  = work_state[influenced_point_id]; \n");
 
  112  source.append(
"        random = work_random[influenced_point_id]; \n");
 
  113  source.append(
"        index  = work_index[influenced_point_id]; \n");
 
  114  source.append(
"      } else if (state == work_state[influenced_point_id]) { \n");
 
  115  source.append(
"        if (random < work_random[influenced_point_id]) { \n");
 
  116  source.append(
"          state  = work_state[influenced_point_id]; \n");
 
  117  source.append(
"          random = work_random[influenced_point_id]; \n");
 
  118  source.append(
"          index  = work_index[influenced_point_id]; \n");
 
  119  source.append(
"        } else if (random == work_random[influenced_point_id]) { \n");
 
  120  source.append(
"          if (index < work_index[influenced_point_id]) { \n");
 
  121  source.append(
"            state  = work_state[influenced_point_id]; \n");
 
  122  source.append(
"            random = work_random[influenced_point_id]; \n");
 
  123  source.append(
"            index  = work_index[influenced_point_id]; \n");
 
  124  source.append(
"          } \n");
 
  125  source.append(
"        } \n");
 
  126  source.append(
"      } \n");
 
  128  source.append(
"    }\n"); 
 
  131  source.append(
"    work_state2[i]  = state; \n");
 
  132  source.append(
"    work_random2[i] = random; \n");
 
  133  source.append(
"    work_index2[i]  = index; \n");
 
  134  source.append(
"  } \n");
 
  135  source.append(
"} \n");
 
  140 template<
typename StringT>
 
  144  source.append(
"__kernel void amg_pmis2_mark_mis_nodes( \n");
 
  145  source.append(
"  __global unsigned int const *work_state, \n");
 
  146  source.append(
"  __global unsigned int const *work_index, \n");
 
  147  source.append(
"  __global unsigned int       *point_types, \n");
 
  148  source.append(
"  __global unsigned int       *undecided_buffer, \n");
 
  149  source.append(
"  unsigned int size) { \n");
 
  151  source.append(
"  unsigned int num_undecided = 0; \n");
 
  152  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
 
  153  source.append(
"    unsigned int max_state  = work_state[i]; \n");
 
  154  source.append(
"    unsigned int max_index  = work_index[i]; \n");
 
  156  source.append(
"    if (point_types[i] == 0) { \n");                     
 
  157  source.append(
"      if      (i == max_index) point_types[i] = 1; \n"); 
 
  158  source.append(
"      else if (max_state == 2) point_types[i] = 2; \n"); 
 
  159  source.append(
"      else                     num_undecided += 1; \n");
 
  160  source.append(
"    } \n");
 
  161  source.append(
"  } \n");
 
  164  source.append(
"  __local unsigned int shared_buffer[256]; \n");
 
  165  source.append(
"  shared_buffer[get_local_id(0)] = num_undecided; \n");
 
  166  source.append(
"  for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) { \n");
 
  167  source.append(
"    barrier(CLK_LOCAL_MEM_FENCE); \n");
 
  168  source.append(
"    if (get_local_id(0) < stride) shared_buffer[get_local_id(0)] += shared_buffer[get_local_id(0)+stride]; \n");
 
  169  source.append(
"  } \n");
 
  171  source.append(
"  if (get_local_id(0) == 0) \n");
 
  172  source.append(
"    undecided_buffer[get_group_id(0)] = shared_buffer[0]; \n");
 
  174  source.append(
"} \n");
 
  178 template<
typename StringT>
 
  182  source.append(
"__kernel void amg_pmis2_reset_state( \n");
 
  183  source.append(
"  __global unsigned int *point_types, \n");
 
  184  source.append(
"  unsigned int size) { \n");
 
  186  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
 
  187  source.append(
"    if (point_types[i] != 1) point_types[i] = 0;\n"); 
 
  188  source.append(
"  } \n");
 
  190  source.append(
"} \n");
 
  199 template<
typename StringT>
 
  203  source.append(
" __kernel void amg_agg_propagate_coarse_indices( \n");
 
  204  source.append(
"  __global unsigned int       *point_types, \n");
 
  205  source.append(
"  __global unsigned int       *coarse_ids, \n");
 
  206  source.append(
"  __global unsigned int const *influences_row, \n");
 
  207  source.append(
"  __global unsigned int const *influences_id, \n");
 
  208  source.append(
"  unsigned int size) { \n");
 
  210  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
 
  211  source.append(
"  { \n");
 
  212  source.append(
"    if (point_types[i] == 1) { \n"); 
 
  213  source.append(
"      unsigned int coarse_index = coarse_ids[i]; \n");
 
  215  source.append(
"      unsigned int j_stop = influences_row[i + 1]; \n");
 
  216  source.append(
"      for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
 
  217  source.append(
"        unsigned int influenced_point_id = influences_id[j]; \n");
 
  218  source.append(
"        coarse_ids[influenced_point_id] = coarse_index; \n");
 
  219  source.append(
"        if (influenced_point_id != i) point_types[influenced_point_id] = 2; \n"); 
 
  220  source.append(
"      } \n");
 
  221  source.append(
"    } \n");
 
  222  source.append(
"  } \n");
 
  223  source.append(
"} \n");
 
  229 template<
typename StringT>
 
  233  source.append(
" __kernel void amg_agg_merge_undecided( \n");
 
  234  source.append(
"  __global unsigned int       *point_types, \n");
 
  235  source.append(
"  __global unsigned int       *coarse_ids, \n");
 
  236  source.append(
"  __global unsigned int const *influences_row, \n");
 
  237  source.append(
"  __global unsigned int const *influences_id, \n");
 
  238  source.append(
"  unsigned int size) { \n");
 
  240  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
 
  241  source.append(
"  { \n");
 
  242  source.append(
"    if (point_types[i] == 0) { \n"); 
 
  244  source.append(
"      unsigned int j_stop = influences_row[i + 1]; \n");
 
  245  source.append(
"      for (unsigned int j = influences_row[i]; j < j_stop; ++j) { \n");
 
  246  source.append(
"        unsigned int influenced_point_id = influences_id[j]; \n");
 
  247  source.append(
"        if (point_types[influenced_point_id] != 0) { \n");       
 
  248  source.append(
"          coarse_ids[i] = coarse_ids[influenced_point_id]; \n");
 
  249  source.append(
"          break; \n");
 
  250  source.append(
"        } \n");
 
  251  source.append(
"      } \n");
 
  253  source.append(
"    } \n");
 
  254  source.append(
"  } \n");
 
  255  source.append(
"} \n");
 
  260 template<
typename StringT>
 
  264  source.append(
" __kernel void amg_agg_merge_undecided_2( \n");
 
  265  source.append(
"  __global unsigned int *point_types, \n");
 
  266  source.append(
"  unsigned int size) { \n");
 
  268  source.append(
"  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
 
  269  source.append(
"    if (point_types[i] == 0) point_types[i] = 2; \n"); 
 
  271  source.append(
"} \n");
 
  276 template<
typename StringT>
 
  280  source.append(
" __kernel void amg_interpol_ag( \n");
 
  281  source.append(
"  __global unsigned int * P_row_indices, \n");
 
  282  source.append(
"  __global unsigned int * P_column_indices, \n");
 
  283  source.append(
"  __global "); source.append(numeric_string); source.append(
" * P_elements, \n");
 
  284  source.append(
"  __global const unsigned int * coarse_agg_ids, \n");
 
  285  source.append(
"  unsigned int size) { \n");
 
  287  source.append(
"   for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
 
  288  source.append(
"   { \n");
 
  289  source.append(
"     P_row_indices[i] = i; \n");
 
  290  source.append(
"     P_column_indices[i] = coarse_agg_ids[i]; \n");
 
  291  source.append(
"     P_elements[i] = 1; \n");
 
  292  source.append(
"   } \n");
 
  293  source.append(
"   if (get_global_id(0) == 0) P_row_indices[size] = size; \n");
 
  294  source.append(
"  } \n");
 
  298 template<
typename StringT>
 
  302  source.append(
"__kernel void amg_interpol_sa( \n");
 
  303  source.append(
" __global unsigned int const *A_row_indices, \n");
 
  304  source.append(
" __global unsigned int const *A_col_indices, \n");
 
  305  source.append(
" __global "); source.append(numeric_string); source.append(
" const *A_elements, \n");
 
  306  source.append(
" unsigned int A_size1, \n");
 
  307  source.append(
" unsigned int A_nnz, \n");
 
  308  source.append(
" __global unsigned int *Jacobi_row_indices, \n");
 
  309  source.append(
" __global unsigned int *Jacobi_col_indices, \n");
 
  310  source.append(
" __global "); source.append(numeric_string); source.append(
" *Jacobi_elements, \n");
 
  311  source.append(
" "); source.append(numeric_string); source.append(
" omega) { \n");
 
  313  source.append(
"  for (unsigned int row = get_global_id(0); row < A_size1; row += get_global_size(0)) \n");
 
  314  source.append(
"  { \n");
 
  315  source.append(
"    unsigned int row_begin = A_row_indices[row]; \n");
 
  316  source.append(
"    unsigned int row_end   = A_row_indices[row+1]; \n");
 
  318  source.append(
"    Jacobi_row_indices[row] = row_begin; \n");
 
  321  source.append(
"    "); source.append(numeric_string); source.append(
" diag = 0; \n");
 
  322  source.append(
"    for (unsigned int j = row_begin; j < row_end; ++j) { \n");
 
  323  source.append(
"      if (A_col_indices[j] == row) { \n");
 
  324  source.append(
"        diag = A_elements[j]; \n");
 
  325  source.append(
"        break; \n");
 
  326  source.append(
"      } \n");
 
  327  source.append(
"    } \n");
 
  330  source.append(
"    for (unsigned int j = row_begin; j < row_end; ++j) { \n");
 
  331  source.append(
"      unsigned int col_index = A_col_indices[j]; \n");
 
  332  source.append(
"      Jacobi_col_indices[j] = col_index; \n");
 
  333  source.append(
"      Jacobi_elements[j] = (col_index == row) ? (1 - omega) : (-omega * A_elements[j] / diag); \n");
 
  334  source.append(
"    } \n");
 
  336  source.append(
"  } \n");
 
  337  source.append(
"  if (get_global_id(0) == 0) Jacobi_row_indices[A_size1] = A_nnz; \n");
 
  338  source.append(
"} \n");
 
  345 template<
typename NumericT>
 
  355     static std::map<cl_context, bool> init_done;
 
  362       source.reserve(2048);
 
  364       viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
 
  379       #ifdef VIENNACL_BUILD_INFO 
  380       std::cout << 
"Creating program " << prog_name << std::endl;
 
  382       ctx.add_program(source, prog_name);
 
  383       init_done[ctx.handle().get()] = 
true;
 
Main kernel class for generating OpenCL kernels for compressed_matrix. 
void generate_amg_pmis2_reset_state(StringT &source)
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
void generate_amg_pmis2_init_workdata(StringT &source)
Provides OpenCL-related utilities. 
const viennacl::ocl::handle< cl_context > & handle() const 
Returns the context handle. 
Common implementations shared by OpenCL-based operations. 
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const 
void generate_amg_pmis2_mark_mis_nodes(StringT &source)
void generate_amg_agg_merge_undecided_2(StringT &source)
static std::string program_name()
void generate_amg_influence_trivial(StringT &source)
static void init(viennacl::ocl::context &ctx)
Representation of an OpenCL kernel in ViennaCL. 
void generate_amg_pmis2_max_neighborhood(StringT &source)
void generate_amg_agg_merge_undecided(StringT &source)
Helper class for converting a type to its string representation. 
void generate_amg_interpol_ag(StringT &source, std::string const &numeric_string)
void generate_amg_interpol_sa(StringT &source, std::string const &numeric_string)
void generate_amg_agg_propagate_coarse_indices(StringT &source)