1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_SOLVE_HPP 
    2 #define VIENNACL_LINALG_OPENCL_KERNELS_MATRIX_SOLVE_HPP 
   39 template<
typename StringT>
 
   41                                  bool row_major_A, 
bool row_major_B,
 
   42                                  bool upper_solve, 
bool unit_diagonal)
 
   45   source.append(
"__kernel void ");
 
   47     source.append(
"unit_");
 
   49     source.append(
"upper_");
 
   51     source.append(
"lower_");
 
   52   source.append(
"solve");
 
   54   source.append(
"( \n");
 
   55   source.append(
"  __global const "); source.append(numeric_string); source.append(
" * A, \n");
 
   56   source.append(
"  unsigned int A_start1, unsigned int A_start2, \n");
 
   57   source.append(
"  unsigned int A_inc1,   unsigned int A_inc2, \n");
 
   58   source.append(
"  unsigned int A_size1,  unsigned int A_size2, \n");
 
   59   source.append(
"  unsigned int A_internal_size1, unsigned int A_internal_size2, \n");
 
   60   source.append(
"  __global "); source.append(numeric_string); source.append(
" * B, \n");
 
   61   source.append(
"  unsigned int B_start1, unsigned int B_start2, \n");
 
   62   source.append(
"  unsigned int B_inc1,   unsigned int B_inc2, \n");
 
   63   source.append(
"  unsigned int B_size1,  unsigned int B_size2, \n");
 
   64   source.append(
"  unsigned int B_internal_size1, unsigned int B_internal_size2) { \n");
 
   65   source.append(
"  "); source.append(numeric_string); source.append(
" temp;  \n");
 
   69     source.append(
"  for (unsigned int row_cnt = 0; row_cnt < A_size1; ++row_cnt)  \n");
 
   70     source.append(
"  {  \n");
 
   71     source.append(
"    unsigned int row = A_size1 - 1 - row_cnt; \n");
 
   75     source.append(
"  for (unsigned int row = 0; row < A_size1; ++row) \n");
 
   76     source.append(
"  { \n");
 
   81     source.append(
"    barrier(CLK_GLOBAL_MEM_FENCE); \n");
 
   82     source.append(
"    if (get_local_id(0) == 0)  \n");
 
   85       source.append(
"      B[(row * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)] /= ");
 
   87       source.append(
"      B[(row * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1] /= ");
 
   90       source.append(
"A[(row * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]; \n");
 
   92       source.append(
"A[(row * A_inc1 + A_start1) + (row * A_inc2 + A_start2)*A_internal_size1]; \n");
 
   95   source.append(
"    barrier(CLK_GLOBAL_MEM_FENCE); \n");
 
   98     source.append(
"    temp = B[(row * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)]; \n");
 
  100     source.append(
"    temp = B[(row * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1]; \n");
 
  102   source.append(
"    //eliminate column of op(A) with index 'row' in parallel: \n");
 
  104     source.append(
"    for  (unsigned int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n");
 
  106     source.append(
"    for  (unsigned int elim = row + get_local_id(0) + 1; elim < A_size1; elim += get_local_size(0)) \n");
 
  109     source.append(
"      B[(elim * B_inc1 + B_start1) * B_internal_size2 + (get_group_id(0) * B_inc2 + B_start2)] -= temp * ");
 
  111     source.append(
"      B[(elim * B_inc1 + B_start1) + (get_group_id(0) * B_inc2 + B_start2) * B_internal_size1] -= temp * ");
 
  114     source.append(
"A[(elim * A_inc1 + A_start1) * A_internal_size2 + (row * A_inc2 + A_start2)]; \n");
 
  116     source.append(
"A[(elim * A_inc1 + A_start1) + (row * A_inc2 + A_start2) * A_internal_size1]; \n");
 
  118   source.append(
"   } \n");
 
  119   source.append(
"} \n");
 
  129 template<
typename NumericT, 
typename LayoutT1, 
typename LayoutT2>
 
  139     static std::map<cl_context, bool> init_done;
 
  148       source.reserve(8192);
 
  150       viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
 
  153       if (numeric_string == 
"float" || numeric_string == 
"double")
 
  166       #ifdef VIENNACL_BUILD_INFO 
  167       std::cout << 
"Creating program " << prog_name << std::endl;
 
  169       ctx.add_program(source, prog_name);
 
  170       init_done[ctx.handle().get()] = 
true;
 
Helper class for checking whether a matrix has a row-major layout. 
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Provides OpenCL-related utilities. 
static std::string program_name()
const viennacl::ocl::handle< cl_context > & handle() const 
Returns the context handle. 
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const 
void generate_matrix_solve_blas3(StringT &source, std::string const &numeric_string, bool row_major_A, bool row_major_B, bool upper_solve, bool unit_diagonal)
Main kernel class for the generation of matrix solve kernels. 
Representation of an OpenCL kernel in ViennaCL. 
std::string type_to_string(viennacl::row_major)
static void init(viennacl::ocl::context &ctx)
Helper class for converting a type to its string representation. 
Runtime generation of OpenCL kernels for matrix operations.