1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SLICED_ELL_MATRIX_HPP 
    2 #define VIENNACL_LINALG_OPENCL_KERNELS_SLICED_ELL_MATRIX_HPP 
   41 template<
typename StringT>
 
   45     source.append(
"__kernel void vec_mul_alpha_beta( \n");
 
   47     source.append(
"__kernel void vec_mul( \n");
 
   48   source.append(
"  __global const unsigned int * columns_per_block, \n");
 
   49   source.append(
"  __global const unsigned int * column_indices, \n");
 
   50   source.append(
"  __global const unsigned int * block_start, \n");
 
   51   source.append(
"  __global const "); source.append(numeric_string); source.append(
" * elements, \n");
 
   52   source.append(
"  __global const "); source.append(numeric_string); source.append(
" * x, \n");
 
   53   source.append(
"  uint4 layout_x, \n");
 
   54   if (with_alpha_beta) { source.append(
"  "); source.append(numeric_string); source.append(
" alpha, \n"); }
 
   55   source.append(
"  __global "); source.append(numeric_string); source.append(
" * result, \n");
 
   56   source.append(
"  uint4 layout_result, \n");
 
   57   if (with_alpha_beta) { source.append(
"  "); source.append(numeric_string); source.append(
" beta, \n"); }
 
   58   source.append(
"  unsigned int block_size) \n");
 
   59   source.append(
"{ \n");
 
   60   source.append(
"  uint blocks_per_workgroup = get_local_size(0) / block_size; \n");
 
   61   source.append(
"  uint id_in_block = get_local_id(0) % block_size; \n");
 
   62   source.append(
"  uint num_blocks  = (layout_result.z - 1) / block_size + 1; \n");
 
   63   source.append(
"  uint global_warp_count  = blocks_per_workgroup * get_num_groups(0); \n");
 
   64   source.append(
"  uint global_warp_id     = blocks_per_workgroup * get_group_id(0) + get_local_id(0) / block_size; \n");
 
   66   source.append(
"  for (uint block_idx = global_warp_id; block_idx < num_blocks; block_idx += global_warp_count) { \n");
 
   67   source.append(
"    "); source.append(numeric_string); source.append(
" sum = 0; \n");
 
   69   source.append(
"    uint row    = block_idx * block_size + id_in_block; \n");
 
   70   source.append(
"    uint offset = block_start[block_idx]; \n");
 
   71   source.append(
"    uint num_columns = columns_per_block[block_idx]; \n");
 
   72   source.append(
"    for (uint item_id = 0; item_id < num_columns; item_id++) { \n");
 
   73   source.append(
"      uint index = offset + item_id * block_size + id_in_block; \n");
 
   74   source.append(
"      "); source.append(numeric_string); source.append(
" val = elements[index]; \n");
 
   75   source.append(
"      sum += (val != 0) ? (x[column_indices[index] * layout_x.y + layout_x.x] * val) : 0; \n");
 
   76   source.append(
"    } \n");
 
   78   source.append(
"    if (row < layout_result.z) \n");
 
   80     source.append(
"      result[row * layout_result.y + layout_result.x] = alpha * sum + ((beta != 0) ? beta * result[row * layout_result.y + layout_result.x] : 0); \n");
 
   82     source.append(
"      result[row * layout_result.y + layout_result.x] = sum; \n");
 
   83   source.append(
"  } \n");
 
   84   source.append(
"} \n");
 
   92 template<
typename NumericT, 
typename IndexT>
 
   95 template<
typename NumericT>
 
  105     static std::map<cl_context, bool> init_done;
 
  112       source.reserve(1024);
 
  114       viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
 
  120       std::string prog_name = program_name();
 
  121       #ifdef VIENNACL_BUILD_INFO 
  122       std::cout << 
"Creating program " << prog_name << std::endl;
 
  124       ctx.add_program(source, prog_name);
 
  125       init_done[ctx.handle().get()] = 
true;
 
static void init(viennacl::ocl::context &ctx)
static std::string program_name()
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Provides OpenCL-related utilities. 
const viennacl::ocl::handle< cl_context > & handle() const 
Returns the context handle. 
Common implementations shared by OpenCL-based operations. 
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const 
Main kernel class for generating OpenCL kernels for ell_matrix. 
void generate_sliced_ell_vec_mul(StringT &source, std::string const &numeric_string, bool with_alpha_beta)
Representation of an OpenCL kernel in ViennaCL. 
Helper class for converting a type to its string representation.